{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "import sqlite3\n",
    "import sys\n",
    "import pickle\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "TP_file = 'train_triplets.txt'\n",
    "md_dbfile = 'track_metadata.db'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tp = pd.read_table(TP_file, header=None, names=['uid', 'sid', 'count'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "MIN_USER_COUNT = 20\n",
    "MIN_SONG_COUNT = 50"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Keep play counts that involve only usable tracks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# tid2sid.json contains a mapping between track id and song id, which can obtained from track_metadata.db\n",
    "with open('tid2sid.json', 'r') as f:\n",
    "    tid2sid = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bad_audio = []\n",
    "\n",
    "with open('tracks_bad_audio.txt', 'r') as f:\n",
    "    for line in f:\n",
    "        bad_audio.append(line.strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bad_sid = [tid2sid[k] for k in bad_audio]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def filter_usable_tracks(tp, bad_sid):\n",
    "    return tp[~tp['sid'].isin(bad_sid)]\n",
    "\n",
    "tp_good = filter_usable_tracks(tp, bad_sid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "46968690 playcount triplets are kept out of 48373586\n"
     ]
    }
   ],
   "source": [
    "print '%d playcount triplets are kept out of %d'% (len(tp_good), len(tp))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Further filter out counts invoving inactive users & songs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_count(tp, id):\n",
    "    playcount_groupbyid = tp[[id, 'count']].groupby(id, as_index=False)\n",
    "    count = playcount_groupbyid.size()\n",
    "    return count\n",
    "\n",
    "def remove_inactive(tp, min_uc=MIN_USER_COUNT, min_sc=MIN_SONG_COUNT):\n",
    "    # Only keep the triplets for songs which were listened to by at least min_sc users. \n",
    "    songcount = get_count(tp, 'sid')\n",
    "    tp = tp[tp['sid'].isin(songcount.index[songcount >= min_sc])]\n",
    "    \n",
    "    # Only keep the triplets for users who listened to at least min_uc songs\n",
    "    # After doing this, some of the songs will have less than min_uc users, but should only be a small proportion\n",
    "    usercount = get_count(tp, 'uid')\n",
    "    tp = tp[tp['uid'].isin(usercount.index[usercount >= min_uc])]\n",
    "    \n",
    "    # Update both usercount and songcount after filtering\n",
    "    usercount, songcount = get_count(tp, 'uid'), get_count(tp, 'sid') \n",
    "    return tp, usercount, songcount"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "tp, usercount, songcount = remove_inactive(tp_good)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "After filtering, there are 38226302 triplets from 613682 users and 97414 songs (sparsity level 0.064%)\n"
     ]
    }
   ],
   "source": [
    "sparsity_level = float(tp.shape[0]) / (usercount.shape[0] * songcount.shape[0])\n",
    "print \"After filtering, there are %d triplets from %d users and %d songs (sparsity level %.3f%%)\" % (tp.shape[0], \n",
    "                                                                                                      usercount.shape[0], \n",
    "                                                                                                      songcount.shape[0], \n",
    "                                                                                                      sparsity_level * 100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes.AxesSubplot at 0x7e2a61d0>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD9CAYAAAB9YErCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9Qk3e+L/D3Y0N/uCoBdgmasI1CUCMUWCUws+s9dpCI\n1gU7dECmBdymOzt63dqzTtf2dl3t6qm6o3O1rux0t+wIeMfUoaOkcwRhPWVxOmNYLWxdWWvsASUh\n0CMmSBUF5Hv/iM9dLsdqjVjC1/dr5juT55s8j983wXzm+TwJUYQQICIiCsak8V4AERFNXCwiREQU\nNBYRIiIKGosIEREFjUWEiIiCxiJCRERBu2sRuXHjxpPp6enOlJSUFrPZ3Prmm29uA4DNmzdvNhgM\n7tTU1ObU1NTmmpqapeo+27Zte9NkMrnmzJlzrq6uzqrOnz59en5SUtIZk8nkWrdu3R51/ubNm08U\nFBR8YDKZXBkZGScvXrz4tHpfeXl5SUJCwvmEhITzFRUVxWMbnYiIHpgQ4q7j2rVrk4UQGBwc1KSn\np588ceLEjzZv3rxp165dvxj92LNnz5qTk5NbBgYGwtra2oxxcXEXhoeHFSEE0tLSmpxOp0UIgaVL\nlx6tqanJFkJg3759a1avXl0qhIDdbi8oKCiwCyHQ09MTOWvWrC98Pp/W5/Np1dv3Wi8HBwcHx7c3\n7tnOmjx58nUAGBgYePzWrVuPRURE+G4XH2X0Y6urq3MLCwsPhoWFDRqNxvb4+PgLTqcz3ev1Tu/r\n65tqsViaAKC4uLjiyJEjKwDA4XDklJSUlANAXl7eh8ePH88EgGPHji2xWq11Wq3Wr9Vq/VlZWfW1\ntbXZY1c+iYjoQd2ziAwPD09KSUlp0el03c8+++zH8+bNOwsAe/fu/XlycvLfbDZbmd/v1wJAZ2fn\nDIPB4Fb3NRgMbo/Hox89r9frPR6PRw8AHo9HHxsb2wEAGo1mKDw8vLenpyfq6441dtGJiOhBae71\ngEmTJg23tLSk9Pb2hi9ZsuRYQ0PDotWrV//+17/+9W8AYOPGjVvWr1+/q6yszPbwl/vfKYrCv9tC\nRBSEO3WU7tc3fndWeHh473PPPffvp06dWhAdHf2loihCURTxyiuvvN/U1GQBAmcYHR0dseo+brfb\nYDAY3Hq93uN2uw2j59V9Ll269H0AGBoa0vT29oZHRUX1jD5WR0dH7Mgzk5GEEIqsY9OmTW+P9xqY\nj9mYT75x/+Xizu5aRC5fvvxdtVXV39//VH19fVZqampzV1dXjPqYw4cPP5+UlHQGAHJychx2u33l\nwMDA421tbTNdLpfJYrE0xcTEdE2bNu2q0+lMF0IolZWVRbm5udXqPuXl5SUAUFVV9UJmZuZxALBa\nrXV1dXVWv9+v9fl8EfX19VlLliw5NlbBJ4r29nbjeK/hYZI5n8zZAOajgLu2s7xe7/SSkpLy4eHh\nScPDw5OKiooqMzMzjxcXF1e0tLSkKIoiZs6c2fbee+/9DADMZnNrfn7+IbPZ3KrRaIZKS0vXqO2m\n0tLSNatWrdrf39//1LJly45mZ2fXAoDNZisrKiqqNJlMrqioqB673b4SACIjI69s3LhxS1pa2l8B\nYNOmTW9rtVr/ndb5t7/9LTkvr+TDoSHxGAAsXvwvf37//Xd/OnY/JiIiuiMRAm8Re5ABQDgcjh9P\nmbKwD2gRQKWYPdtybrzXNVbj448/XjTea2A+ZmM++Ubg5f/Bj6PcPtiEpSiKcDgcOUVFf6js7f0o\nHHBi9uxXPz93zjlnvNdGRBSqFEURYgyujfDPnoS4hoaGReO9hodJ5nwyZwOYjwJYRIiIKGgsIiFu\n0aJFDeO9hodJ5nwyZwOYjwJYRIiIKGgsIiFO9r6szPlkzgYwHwWwiBARUdBYREKc7H1ZmfPJnA1g\nPgpgESEioqCxiIQ42fuyMueTORvAfBTAIkJEREFjEQlxsvdlZc4nczaA+SiARYSIiILGIhLiZO/L\nypxP5mwA81EAiwgREQWNRSTEyd6XlTmfzNkA5qMAFhEiIgoai0iIk70vK3M+mbMBzEcBLCJERBQ0\nFpEQJ3tfVuZ8MmcDmI8CWESIiChoLCIhTva+rMz5ZM4GMB8FsIgQEVHQ7lpEbty48WR6erozJSWl\nxWw2t7755pvbAODKlSuRWVlZ9QkJCeetVmud3+/Xqvts27btTZPJ5JozZ865uro6qzp/+vTp+UlJ\nSWdMJpNr3bp1e9T5mzdvPlFQUPCByWRyZWRknLx48eLT6n3l5eUlCQkJ5xMSEs5XVFQUj230iUH2\nvqzM+WTOBjAf3SaEuOu4du3aZCEEBgcHNenp6SdPnDjxo9dff/23O3bs+KUQAtu3b9+wYcOG7UII\nnD171pycnNwyMDAQ1tbWZoyLi7swPDysCCGQlpbW5HQ6LUIILF269GhNTU22EAL79u1bs3r16lIh\nBOx2e0FBQYFdCIGenp7IWbNmfeHz+bQ+n0+r3h69PgDC4XD8ODx8uR8QAjgpZs+2nLtXLg4ODo5H\neQRe/h/8OPdsZ02ePPk6AAwMDDx+69atxyIiInwOhyOnpKSkHABKSkrKjxw5sgIAqqurcwsLCw+G\nhYUNGo3G9vj4+AtOpzPd6/VO7+vrm2qxWJoAoLi4uELdZ+Sx8vLyPjx+/HgmABw7dmyJ1Wqt02q1\nfq1W68/Kyqqvra3NHvsyGtpk78vKnE/mbADzUYDmXg8YHh6e9IMf/ODTL774Im716tW/nzdv3tnu\n7m6dTqfrBgCdTtfd3d2tA4DOzs4ZGRkZJ9V9DQaD2+Px6MPCwgYNBoNbndfr9R6Px6MHAI/Ho4+N\nje0AAI1GMxQeHt7b09MT1dnZOWPkPuqx7rTG3bt3r7txw/0ksBlAL65fvzpZvU/9RVBPTSfadktL\nS0oorYf5uM3tibnd0NCwaP/+/asAwGg0tmOsfNNTFr/fH56enn7yP/7jP57VarW+kfdFRERcEUJg\n7dq1ew8cOPCiOm+z2d6vqqrKO3Xq1PzFixfXq/ONjY0Lly9f/pEQAomJiWc8Hs8M9b64uLgLly9f\njtq5c+f6rVu3vqXOb9my5Vc7d+5cf6dTMrazODg4OO5v4NtqZ6nCw8N7n3vuuX8/ffr0fJ1O193V\n1RUDAF6vd3p0dPSXQOAMo6OjI1bdx+12GwwGg1uv13vcbrdh9Ly6z6VLl74PAENDQ5re3t7wqKio\nntHH6ujoiB15ZkJEROPvrkXk8uXL31XfedXf3/9UfX19VmpqanNOTo6jvLy8BAi8g2rFihVHACAn\nJ8dht9tXDgwMPN7W1jbT5XKZLBZLU0xMTNe0adOuOp3OdCGEUllZWZSbm1ut7qMeq6qq6oXMzMzj\nAGC1Wuvq6uqsfr9f6/P5Iurr67OWLFly7GH+MEKRejoqK5nzyZwNYD4KuOs1Ea/XO72kpKR8eHh4\n0vDw8KSioqLKzMzM46mpqc35+fmHysrKbEajsf3QoUP5AGA2m1vz8/MPmc3mVo1GM1RaWrpGURQB\nAKWlpWtWrVq1v7+//6lly5Ydzc7OrgUAm81WVlRUVGkymVxRUVE9drt9JQBERkZe2bhx45a0tLS/\nAsCmTZve1mq1/of74yAiovuh3O6NTViKogiHw5FTVPSHyt7ej8IBJ2bPfvXzc+ecc8Z7bUREoUpR\nFCGEUB70OPzEOhERBY1FJMTJ3peVOZ/M2QDmowAWESIiChqLSIhTPzQkK5nzyZwNYD4KYBEhIqKg\nsYiEONn7sjLnkzkbwHwUwCJCRERBYxEJcbL3ZWXOJ3M2gPkogEWEiIiCxiIS4mTvy8qcT+ZsAPNR\nAIsIEREFjUUkxMnel5U5n8zZAOajABYRIiIKGotIiJO9LytzPpmzAcxHASwiREQUNBaRECd7X1bm\nfDJnA5iPAlhEiIgoaCwiIU72vqzM+WTOBjAfBbCIEBFR0FhEQpzsfVmZ88mcDWA+CmARISKioLGI\nhDjZ+7Iy55M5G8B8FMAiQkREQbtrEeno6Ih99tlnP543b97ZxMTEv7/77ruvAsDmzZs3GwwGd2pq\nanNqampzTU3NUnWfbdu2vWkymVxz5sw5V1dXZ1XnT58+PT8pKemMyWRyrVu3bo86f/PmzScKCgo+\nMJlMroyMjJMXL158Wr2vvLy8JCEh4XxCQsL5ioqK4rGNPjHI3peVOZ/M2QDmo9uEEF87vF5vTHNz\nc4oQAn19fVMSEhI+b21tnbt58+ZNu3bt+sXox589e9acnJzcMjAwENbW1maMi4u7MDw8rAghkJaW\n1uR0Oi1CCCxduvRoTU1NthAC+/btW7N69epSIQTsdntBQUGBXQiBnp6eyFmzZn3h8/m0Pp9Pq94e\n/W8CEA6H48fh4cv9gBDASTF7tuXc3XJxcHBwPOoj8PL/4Me565lITExMV0pKSgsATJky5au5c+f+\nw+Px6G//68rox1dXV+cWFhYeDAsLGzQaje3x8fEXnE5nutfrnd7X1zfVYrE0AUBxcXHFkSNHVgCA\nw+HIKSkpKQeAvLy8D48fP54JAMeOHVtitVrrtFqtX6vV+rOysupra2uzx658Tgyy92VlzidzNoD5\nKEDzTR/Y3t5ubG5uTs3IyDj5ySef/HDv3r0/r6ioKF6wYMGpXbt2rddqtf7Ozs4ZGRkZJ9V9DAaD\n2+Px6MPCwgYNBoNbndfr9R61GHk8Hn1sbGwHAGg0mqHw8PDenp6eqM7Ozhkj91GPdae17d69e92N\nG+4ngc0AenH9+tXJ6n3qL4J6ajrRtltaWlJCaT3Mx21uT8zthoaGRfv3718FAEajsR1j5ZucrvT1\n9U2ZP3/+qcOHD68QQqC7uzt6eHhYGR4eVt56662tL7/8cpkQAmvXrt174MCBF9X9bDbb+1VVVXmn\nTp2av3jx4np1vrGxceHy5cs/EkIgMTHxjMfjmaHeFxcXd+Hy5ctRO3fuXL9169a31PktW7b8aufO\nnevvdErGdhYHBwfH/Q18G+0sABgcHAzLy8v78KWXXjqwYsWKIwAQHR39paIoQlEU8corr7zf1NRk\nAQJnGB0dHbHqvm6322AwGNx6vd7jdrsNo+fVfS5duvR9ABgaGtL09vaGR0VF9Yw+VkdHR+zIMxMi\nIhp/dy0iQgjFZrOVmc3m1tdee223Ou/1eqertw8fPvx8UlLSGQDIyclx2O32lQMDA4+3tbXNdLlc\nJovF0hQTE9M1bdq0q06nM10IoVRWVhbl5uZWq/uUl5eXAEBVVdULmZmZxwHAarXW1dXVWf1+v9bn\n80XU19dnLVmy5NjD+CGEMvV0VFYy55M5G8B8FHDXayKffPLJDw8cOPDSM88881lqamozALzzzjv/\n6+DBg4UtLS0piqKImTNntr333ns/AwCz2dyan59/yGw2t2o0mqHS0tI1iqIIACgtLV2zatWq/f39\n/U8tW7bsaHZ2di0A2Gy2sqKiokqTyeSKiorqsdvtKwEgMjLyysaNG7ekpaX9FQA2bdr0tlar9T/M\nHwYREd0f5XZvbMJSFEU4HI6coqI/VPb2fhQOODF79qufnzvnnDPeayMiClWKoghxh3fZ3i9+Yp2I\niILGIhLiZO/LypxP5mwA81EAiwgREQWNRSTEqR8akpXM+WTOBjAfBbCIEBFR0FhEQpzsfVmZ88mc\nDWA+CmARISKioLGIhDjZ+7Iy55M5G8B8FMAiQkREQWMRCXGy92VlzidzNoD5KIBFhIiIgsYiEuJk\n78vKnE/mbADzUQCLCBERBY1FJMTJ3peVOZ/M2QDmowAWESIiChqLSIiTvS8rcz6ZswHMRwEsIkRE\nFDQWkRAne19W5nwyZwOYjwJYRIiIKGgsIiFO9r6szPlkzgYwHwWwiBARUdBYREKc7H1ZmfPJnA1g\nPgpgESEioqDdtYh0dHTEPvvssx/PmzfvbGJi4t/ffffdVwHgypUrkVlZWfUJCQnnrVZrnd/v16r7\nbNu27U2TyeSaM2fOubq6Oqs6f/r06flJSUlnTCaTa926dXvU+Zs3bz5RUFDwgclkcmVkZJy8ePHi\n0+p95eXlJQkJCecTEhLOV1RUFI9t9IlB9r6szPlkzgYwH90mhPja4fV6Y5qbm1OEEOjr65uSkJDw\neWtr69zXX3/9tzt27PilEALbt2/fsGHDhu1CCJw9e9acnJzcMjAwENbW1maMi4u7MDw8rAghkJaW\n1uR0Oi1CCCxduvRoTU1NthAC+/btW7N69epSIQTsdntBQUGBXQiBnp6eyFmzZn3h8/m0Pp9Pq94e\nvUYAwuFw/Dg8fLkfEAI4KWbPtpy7Wy4ODg6OR30EXv4f/Dh3PROJiYnpSklJaQGAKVOmfDV37tx/\neDwevcPhyCkpKSkHgJKSkvIjR46sAIDq6urcwsLCg2FhYYNGo7E9Pj7+gtPpTPd6vdP7+vqmWiyW\nJgAoLi6uUPcZeay8vLwPjx8/ngkAx44dW2K1Wuu0Wq1fq9X6s7Ky6mtra7MfTikNXbL3ZWXOJ3M2\ngPkoQPNNH9je3m5sbm5OTU9Pd3Z3d+t0Ol03AOh0uu7u7m4dAHR2ds7IyMg4qe5jMBjcHo9HHxYW\nNmgwGNzqvF6v93g8Hj0AeDwefWxsbAcAaDSaofDw8N6enp6ozs7OGSP3UY91p7Xt3r173Y0b7ieB\nzQB6cf361cnqfeovgnpqOtG2W1paUkJpPczHbW5PzO2GhoZF+/fvXwUARqOxHWPlm5yu9PX1TfnB\nD35w+vDhwyuEENBqtb6R90dERFwRQmDt2rV7Dxw48KI6b7PZ3q+qqso7derU/MWLF9er842NjQuX\nL1/+kRACiYmJZzwezwz1vri4uAuXL1+O2rlz5/qtW7e+pc5v2bLlVzt37lx/p1MytrM4ODg47m/g\n22hnAcDg4GBYXl7eh0VFRZUrVqw4AgTOPrq6umIAwOv1To+Ojv4SCJxhdHR0xKr7ut1ug8FgcOv1\neo/b7TaMnlf3uXTp0vcBYGhoSNPb2xseFRXVM/pYHR0dsSPPTIiIaPzdtYgIIRSbzVZmNptbX3vt\ntd3qfE5OjqO8vLwECLyDSi0uOTk5DrvdvnJgYODxtra2mS6Xy2SxWJpiYmK6pk2bdtXpdKYLIZTK\nysqi3Nzc6tHHqqqqeiEzM/M4AFit1rq6ujqr3+/X+ny+iPr6+qwlS5Yce1g/iFClno7KSuZ8MmcD\nmI8C7npN5JNPPvnhgQMHXnrmmWc+S01NbQYCb+F94403tufn5x8qKyuzGY3G9kOHDuUDgNlsbs3P\nzz9kNptbNRrNUGlp6RpFUQQAlJaWrlm1atX+/v7+p5YtW3Y0Ozu7FgBsNltZUVFRpclkckVFRfXY\n7faVABAZGXll48aNW9LS0v4KAJs2bXpbq9X6H+YPg4iI7o9yuzc2YSmKIhwOR05R0R8qe3s/Cgec\nmD371c/PnXPOGe+1ERGFKkVRhBBCedDj8BPrREQUNBaRECd7X1bmfDJnA5iPAlhEiIgoaCwiIU79\n0JCsZM4nczaA+SiARYSIiILGIhLiZO/LypxP5mwA81EAiwgREQWNRSTEyd6XlTmfzNkA5qMAFhEi\nIgoai0iIk70vK3M+mbMBzEcBLCJERBQ0FpEQJ3tfVuZ8MmcDmI8CWESIiChoLCIhTva+rMz5ZM4G\nMB8FsIgQEVHQWERCnOx9WZnzyZwNYD4KYBEhIqKgsYiEONn7sjLnkzkbwHwUwCJCRERBYxEJcbL3\nZWXOJ3M2gPkogEWEiIiCxiIS4mTvy8qcT+ZsAPNRAIsIEREF7Z5F5OWXX/6TTqfrTkpKOqPObd68\nebPBYHCnpqY2p6amNtfU1CxV79u2bdubJpPJNWfOnHN1dXVWdf706dPzk5KSzphMJte6dev2qPM3\nb958oqCg4AOTyeTKyMg4efHixafV+8rLy0sSEhLOJyQknK+oqCgem8gTi+x9WZnzyZwNYD66TQhx\n19HY2Ljw008/TU1MTDyjzm3evHnTrl27fjH6sWfPnjUnJye3DAwMhLW1tRnj4uIuDA8PK0IIpKWl\nNTmdTosQAkuXLj1aU1OTLYTAvn371qxevbpUCAG73V5QUFBgF0Kgp6cnctasWV/4fD6tz+fTqrdH\n/5sAhMPh+HF4+HI/IARwUsyebTl3r1wcHBwcj/IIvPw/+HHueSaycOHCExEREb47FB9l9Fx1dXVu\nYWHhwbCwsEGj0dgeHx9/wel0pnu93ul9fX1TLRZLEwAUFxdXHDlyZAUAOByOnJKSknIAyMvL+/D4\n8eOZAHDs2LElVqu1TqvV+rVarT8rK6u+trY2+0EK5kQke19W5nwyZwOYjwI0we64d+/en1dUVBQv\nWLDg1K5du9ZrtVp/Z2fnjIyMjJPqYwwGg9vj8ejDwsIGDQaDW53X6/Uej8ejBwCPx6OPjY3tAACN\nRjMUHh7e29PTE9XZ2Tlj5D7qse60lt27d6+7ccP9JLAZQC+uX786Wb1P/UVQT00n2nZLS0tKKK2H\n+bjN7Ym53dDQsGj//v2rAMBoNLZjrHyT05W2tjbjyHZWd3d39PDwsDI8PKy89dZbW19++eUyIQTW\nrl2798CBAy+qj7PZbO9XVVXlnTp1av7ixYvr1fnGxsaFy5cv/0gIgcTExDMej2eGel9cXNyFy5cv\nR+3cuXP91q1b31Lnt2zZ8qudO3euv9MpGdtZHBwcHPc38G21s+4kOjr6S0VRhKIo4pVXXnm/qanJ\nAgTOMDo6OmLVx7ndboPBYHDr9XqP2+02jJ5X97l06dL3AWBoaEjT29sbHhUV1TP6WB0dHbEjz0yI\niGj8BVVEvF7vdPX24cOHn1ffuZWTk+Ow2+0rBwYGHm9ra5vpcrlMFoulKSYmpmvatGlXnU5nuhBC\nqaysLMrNza1W9ykvLy8BgKqqqhcyMzOPA4DVaq2rq6uz+v1+rc/ni6ivr89asmTJsQePPLGop6Oy\nkjmfzNkA5qOAe14TKSwsPPiXv/zlXy5fvvzd2NjYjrfffntTQ0PDopaWlhRFUcTMmTPb3nvvvZ8B\ngNlsbs3Pzz9kNptbNRrNUGlp6RpFUQQAlJaWrlm1atX+/v7+p5YtW3Y0Ozu7FgBsNltZUVFRpclk\nckVFRfXY7faVABAZGXll48aNW9LS0v4KAJs2bXpbq9X6H96PgoiI7pdyuzc2YSmKIhwOR05R0R8q\ne3s/CgecmD371c/PnXPOGe+1ERGFKkVRhLjDu2zvFz+xTkREQWMRCXGy92VlzidzNoD5KIBFhIiI\ngsYiEuLUDw3JSuZ8MmcDmI8CWESIiChoLCIhTva+rMz5ZM4GMB8FsIgQEVHQWERCnOx9WZnzyZwN\nYD4KYBEhIqKgsYiEONn7sjLnkzkbwHwUwCJCRERBYxEJcbL3ZWXOJ3M2gPkoQMoi4nL9zaR+38m0\naZFXx3s9RESykrKIDA/fnAQIAAJ9fb6p472eByF7X1bmfDJnA5iPAqQsIkRE9O2Q8vtEgAwEzkQA\nQMFY/M18IiKZ8PtEiIho3LGIhDjZ+7Iy55M5G8B8FMAiQkREQeM1ESKiRxCviRAR0bhjEQlxsvdl\nZc4nczaA+SiARYSIiIJ2zyLy8ssv/0mn03UnJSWdUeeuXLkSmZWVVZ+QkHDearXW+f1+rXrftm3b\n3jSZTK45c+acq6urs6rzp0+fnp+UlHTGZDK51q1bt0edv3nz5hMFBQUfmEwmV0ZGxsmLFy8+rd5X\nXl5ekpCQcD4hIeF8RUVF8dhEnlhk//s9MueTORvAfHSbEOKuo7GxceGnn36ampiYeEade/3113+7\nY8eOXwohsH379g0bNmzYLoTA2bNnzcnJyS0DAwNhbW1txri4uAvDw8OKEAJpaWlNTqfTIoTA0qVL\nj9bU1GQLIbBv3741q1evLhVCwG63FxQUFNiFEOjp6YmcNWvWFz6fT+vz+bTq7dHrAyAcDsePw8OX\n+wEhgJOBv3cCcXtA3CsjBwcHx6M2xuq18Z5nIgsXLjwRERHhGznncDhySkpKygGgpKSk/MiRIysA\noLq6OrewsPBgWFjYoNFobI+Pj7/gdDrTvV7v9L6+vqkWi6UJAIqLiyvUfUYeKy8v78Pjx49nAsCx\nY8eWWK3WOq1W69dqtf6srKz62tra7LEqnhOF7H1ZmfPJnA1gPgrQBLNTd3e3TqfTdQOATqfr7u7u\n1gFAZ2fnjIyMjJPq4wwGg9vj8ejDwsIGDQaDW53X6/Uej8ejBwCPx6OPjY3tAACNRjMUHh7e29PT\nE9XZ2Tlj5D7qse60nt27d6+7ccP9JLAZQO9/u7+hoWGRemqq/mJMlO2WlpaUUFoP83Gb2xNzu6Gh\nYdH+/ftXAYDRaGzHWPkmpyttbW3Gke0srVbrG3l/RETEFSEE1q5du/fAgQMvqvM2m+39qqqqvFOn\nTs1fvHhxvTrf2Ni4cPny5R8JIZCYmHjG4/HMUO+Li4u7cPny5aidO3eu37p161vq/JYtW361c+fO\n9Xc6JWM7i4ODg+P+xli9Ngb17iydTtfd1dUVAwBer3d6dHT0l0DgDKOjoyNWfZzb7TYYDAa3Xq/3\nuN1uw+h5dZ9Lly59HwCGhoY0vb294VFRUT2jj9XR0RE78syEiIjGX1BFJCcnx1FeXl4CBN5BtWLF\niiPqvN1uXzkwMPB4W1vbTJfLZbJYLE0xMTFd06ZNu+p0OtOFEEplZWVRbm5u9ehjVVVVvZCZmXkc\nAKxWa11dXZ3V7/drfT5fRH19fdaSJUuOjU3siUM9HZWVzPlkzgYwHwXc85pIYWHhwb/85S//cvny\n5e/GxsZ2/OY3v/n1G2+8sT0/P/9QWVmZzWg0th86dCgfAMxmc2t+fv4hs9ncqtFohkpLS9coiiIA\noLS0dM2qVav29/f3P7Vs2bKj2dnZtQBgs9nKioqKKk0mkysqKqrHbrevBIDIyMgrGzdu3JKWlvZX\nANi0adPbWq3W//B+FEREdL/4t7OIiB5B/NtZREQ07lhEQpzsfVmZ88mcDWA+CmARISKioPGaCBHR\nI4jXRIiIaNyxiIQ42fuyMueTORvAfBTAIkJEREHjNREiokcQr4kQEdG4YxEJcbL3ZWXOJ3M2gPko\ngEWEiIhP0q1GAAAMXklEQVSCxmsiRESPIF4TISKiccciEuJk78vKnE/mbADzUQCLCBERBY3XRIiI\nHkG8JkJEROOORSTEyd6XlTmfzNkA5qMAFhEiIgraI3BNJAzAEABg6tSIvqtXr0wbp6USEYWMsbom\nohmLxYS2IagFpa9PmTq+ayEikgvbWSFO9r6szPlkzgYwHwWwiBARUdAeqIgYjcb2Z5555rPU1NRm\ni8XSBABXrlyJzMrKqk9ISDhvtVrr/H6/Vn38tm3b3jSZTK45c+acq6urs6rzp0+fnp+UlHTGZDK5\n1q1bt0edv3nz5hMFBQUfmEwmV0ZGxsmLFy8+/SDrnYgWLVrUMN5reJhkzidzNoD5KOCBioiiKKKh\noWFRc3NzalNTkwUAtm/f/kZWVlb9+fPnEzIzM49v3779DQBobW01f/DBBwWtra3m2tra7DVr1pSq\nF3VWr179+7KyMpvL5TK5XC5TbW1tNgCUlZXZoqKielwul+lf//Vf//eGDRt2PGhgIiIaOw/czhp9\ndd/hcOSUlJSUA0BJSUn5kSNHVgBAdXV1bmFh4cGwsLBBo9HYHh8ff8HpdKZ7vd7pfX19U9UzmeLi\n4gp1n5HHysvL+/D48eOZD7reiUb2vqzM+WTOBjAfBTzQu7MURRGLFy/+82OPPXbrZz/72Xs//elP\n/9jd3a3T6XTdAKDT6bq7u7t1ANDZ2TkjIyPjpLqvwWBwezwefVhY2KDBYHCr83q93uPxePQA4PF4\n9LGxsR0AoNFohsLDw3uvXLkSGRkZeWXkOnbv3r3uxg33k8BmAL13WGkDgEWBW7d/MdRT1VDfbmlp\nSQml9TAft7k9MbcbGhoW7d+/fxUQuBSBsSKECHp0dnZOF0Lgyy+//F5ycnJLY2PjQq1W6xv5mIiI\niCtCCKxdu3bvgQMHXlTnbTbb+1VVVXmnTp2av3jx4np1vrGxceHy5cs/EkIgMTHxjMfjmaHeFxcX\nd6Gnpydy5PEBCIfD8ePw8OV+QAjgpAAgArfFf7v9IHk5ODg4ZBlj9Xr4QO2s6dOnewHge9/73n89\n//zzh5uamiw6na67q6srBgC8Xu/06OjoL4HAGUZHR0esuq/b7TYYDAa3Xq/3uN1uw+h5dZ9Lly59\nHwCGhoY0vb294aPPQoiIaPwEXUSuX78+ua+vbyoAXLt27Tt1dXXWpKSkMzk5OY7y8vISACgvLy9Z\nsWLFEQDIyclx2O32lQMDA4+3tbXNdLlcJovF0hQTE9M1bdq0q06nM10IoVRWVhbl5uZWq/uox6qq\nqnohMzPz+INHnljU01FZyZxP5mwA81FA0NdEuru7dc8///xhIHCW8OKLL/4fq9Vat2DBglP5+fmH\nysrKbEajsf3QoUP5AGA2m1vz8/MPmc3mVo1GM1RaWrpGURQBAKWlpWtWrVq1v7+//6lly5Ydzc7O\nrgUAm81WVlRUVGkymVxRUVE9drt95ViEJiKisfEI/O0sBfxuESKi/x+/T4SIiMYdi0iIk70vK3M+\nmbMBzEcBLCJERBQ0XhMhInoE8ZoIERGNu0esiGigKIpQx7RpkVfHe0X3IntfVuZ8MmcDmI8CHoFv\nNhzpn99yCPCbDomIHtQjd01kZBHhNRIielTxmggREY07FpEQJ3tfVuZ8MmcDmI8CWESIiChovCbC\nayJE9AjiNREiIhp3LCIhTva+rMz5ZM4GMB8FPOJF5J8fPpwIHzwkIgo1j/w1Ef5dLSJ6FPGaCBER\njTsWkRAne19W5nwyZwOYjwJYRP4fXh8hIrpfvCbC7x0hokcQr4kQEdG4YxG5o9Bpbcnel5U5n8zZ\nAOajgJAvIrW1tdlz5sw5ZzKZXDt27Njw7fyr6veOCPT1+cb1O0daWlpSxvPff9hkzidzNoD5KCCk\ni8itW7ceW7t27e9qa2uzW1tbzQcPHiz8xz/+MffbXcX4npX4/X7tt/1vfptkzidzNoD5KCCki0hT\nU5MlPj7+gtFobA8LCxtcuXKlvbq6OvfbXcXIs5K+qf/8et3HJ9TX7BIRPQwhXUQ8Ho8+Nja2Q902\nGAxuj8ejH/24SZMmDd+82fREePiPe7/znfXXHt6K/llQgEHcq7h8s9t3L0Lt7e3Gh5dn/MmcT+Zs\nAPPRbUKIkB1VVVV5r7zyyh/V7crKypfWrl27d+Rj8M9XdQ4ODg6O+xhj8TqtQQjT6/Wejo6OWHW7\no6Mj1mAwuEc+hp/nICIaPyHdzlqwYMEpl8tlam9vNw4MDDz+wQcfFOTk5DjGe11ERBQQ0mciGo1m\n6He/+93aJUuWHLt169ZjNputbO7cuf8Y73UREdFt433dI9hRU1OTPXv27HPx8fGu7du3bxjv9QQ7\nnn766fakpKTPUlJSmtPS0pqEEOjp6YlcvHhxvclkOp+VlVXn8/m06uPfeeedN+Pj412zZ88+d+zY\nMet4r3/0+MlPfvKn6Ojo7sTExDPqXDB5Tp06NT8xMfFMfHy869VXX90z3rnulm/Tpk2b9Xq9OyUl\npTklJaX56NGjSydivkuXLsUuWrToY7PZfHbevHl/37Nnz6syPX9fl0+W56+/v/9Ji8XiTE5Obpk7\nd27rG2+8se3beP7GPXgwY2ho6LG4uLgLbW1txoGBgbDk5OSW1tbWueO9rmCG0Whs6+npiRw59/rr\nr/92x44dvxRCYPv27Rs2bNiwXQiBs2fPmpOTk1sGBgbC2trajHFxcRdu3bo1abwzjByNjY0LP/30\n09SRL7L3k2d4eFgRQiAtLa3J6XRahBBYunTp0ZqamuzxzvZ1+TZv3rxp165dvxj92ImWz+v1xjQ3\nN6cIIdDX1zclISHh89bW1rmyPH9fl0+W508IgWvXrk0WQmBwcFCTnp5+8sSJEz962M9fSF8T+Tqh\n8fmRsSNGvTnA4XDklJSUlANASUlJ+ZEjR1YAQHV1dW5hYeHBsLCwQaPR2B4fH3+hqanJMh5r/joL\nFy48ERER4Rs5dz95nE5nutfrnd7X1zfVYrE0AUBxcXGFus94u1M+4M5v8Jho+WJiYrpSUlJaAGDK\nlClfzZ079x8ej0cvy/P3dfkAOZ4/AJg8efJ1ABgYGHj81q1bj0VERPge9vM3IYvIN/38yESgKIpY\nvHjxnxcsWHDqj3/8408BoLu7W6fT6boBQKfTdXd3d+sAoLOzc8bId6dNlNz3m2f0vF6v94R6zr17\n9/48OTn5bzabrUz9pPNEztfe3m5sbm5OTU9Pd8r4/Kn5MjIyTgLyPH/Dw8OTUlJSWnQ6Xfezzz77\n8bx5884+7OdvQhYRRVHEeK9hrHzyySc/bG5uTq2pqVm6b9++/3nixImFI+9XP5D4dftPtJ/FvfJM\nRKtXr/59W1vbzJaWlpTp06d7169fv2u81/Qgvvrqqyl5eXkf7tmzZ93UqVP7Rt4nw/P31VdfTXnh\nhReq9uzZs27KlClfyfT8TZo0abilpSXF7XYbGhsb/8fHH3/87Mj7H8bzNyGLyDf5/MhEMX36dC8A\nfO973/uv559//nBTU5NFp9N1d3V1xQCA1+udHh0d/SXw33O73W6DXq/3jM/Kv7n7yWMwGNx6vd7j\ndrsNI+dDOWd0dPSX6n/OV1555X21xTgR8w0ODobl5eV9WFRUVLlixYojgFzPn5rvpZdeOqDmk+n5\nU4WHh/c+99xz/3769On5D/v5m5BFRJbPj1y/fn1yX1/fVAC4du3ad+rq6qxJSUlncnJyHOXl5SUA\nUF5eXqL+sufk5DjsdvvKgYGBx9va2ma6XC6T2rcMZfebJyYmpmvatGlXnU5nuhBCqaysLFL3CUVe\nr3e6evvw4cPPJyUlnQEmXj4hhGKz2crMZnPra6+9tludl+X5+7p8sjx/ly9f/q7aiuvv73+qvr4+\nKzU1tfmhP3/j/W6CYMfRo0eXJiQkfB4XF3fhnXfeeXO81xPM+M///M+ZycnJLcnJyS3z5s37u5qj\np6cnMjMz8893ekvev/3bv/2vuLi4C7Nnzz5XW1u7ZLwzjB4rV648OH369M6wsLABg8HQ8ac//ekn\nweRR32IYFxd34ec///m7453r6/KVlZW9XFRUVJGUlPTZM88887fc3NwjXV1duomY78SJEz9SFGU4\nOTm5RX27a01NTbYsz9+d8h09enSpLM/fZ599lpSamvppcnJyS1JS0me//e1vXxciuNeT+8k34b8e\nl4iIxs+EbGcREVFoYBEhIqKgsYgQEVHQWESIiChoLCJERBQ0FhEiIgra/wV34MrSPXcgNQAAAABJ\nRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x3738a90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "usercount.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes.AxesSubplot at 0x7e2ab850>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAD9CAYAAABEB/uZAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9UU3eeN/APmpRip4BBCZrQxgmXIIaCRQnbpz5DJwS0\nY4EWFemMYsvMaXG16DPHop55zuyeXSF0dk61M9o5p6Urg0+NDu0onZUUpDK6U4kFZUqlDMEGISFE\nmwBS5ad8nz/ce4ZlrF4W6L1l3q9zPuf0fnNv8k6CfHq/35DrxxgjAACAyZgjdgAAAPj2QfMAAIBJ\nQ/MAAIBJQ/MAAIBJQ/MAAIBJQ/MAAIBJu2/zOHDgQH5MTEyTXq//7MCBA/lERD6fT2EymaojIyNb\nU1JSqnp7e4P5/YuKivZwHGePiopqqaqqSuHHGxoa4mNiYpo4jrPn5+cf4MeHhob8s7KyjnEcZ09M\nTKy7evXqo9P9JAEAYJoxxr62mpqa9Hq9vmlgYODB0dHRucnJydVtbW3aXbt2vVZcXPwqY4zMZnNB\nQUGBmTFGly9fjo6NjW0cHh6WOxwOjVarbRsbG/NjjNHKlSsv2Gy2BMYYrVmz5lRlZeVqxhgdPHhw\na15e3iHGGFkslqysrCzLvTKhUCgUSvy655lHS0tLlMFgsD344IODc+fOvf29733vj++9915mRUVF\nWk5OTikRUU5OTumJEycyiIhOnjyZnp2dfVQul49oNJr2iIiINpvNZnC73Yv6+/sfTkhIuEBEtHnz\n5t/yx4y/r8zMzPdqamqMM9suAQBgqu7ZPPR6/Wfnzp1b5fP5FLdu3Zp36tSpp51Op9rj8SiVSqWH\niEipVHo8Ho+SiKirq2uxWq128ser1Wqny+VSTRxXqVQul8ulIiJyuVyq8PDwTiIimUw2GhQU1Ofz\n+RQz8WQBAGB6yO51Y1RUVEtBQUFxSkpK1UMPPXQzLi6uce7cubfH7+Pn58f8/Pxm/DtOvonHAACY\njRhjftN9n/ddMH/xxRffqa+vX/HHP/7xe/Pnz++JjIxsVSqVnu7u7jAiIrfbvSg0NPQa0Z0zis7O\nznD+WKfTqVar1U6VSuVyOp3qieP8MR0dHY8QEY2Ojsr6+vqCFAqF725ZGGN+Uqqf//zn/yx2BmSa\nXbmQCZmmu6bWIr7efZvHtWvXQomIOjo6Hnn//fefe/75599NS0urKC0tzSEiKi0tzcnIyDhBRJSW\nllZhsVg2Dg8PP+BwOJbY7XYuISHhQlhYWHdgYOANm81mYIz5lZWVbUpPTz/JH8PfV3l5+Tqj0Vgz\nU092urW3t2vEzjARMgknxVzIJAwyie+e01ZEROvWrSv3er0hcrl85NChQ1uDgoL6du/ebd6wYcPx\nkpKSXI1G0378+PENRETR0dHNGzZsOB4dHd0sk8lGDx06tJWfbjp06NDWLVu2HB4YGAh4+umnT61e\nvdpKRJSbm1uyadOmMo7j7CEhIV6LxbLx67KcPHkynYjoiSee+HjhwoXXp+clAACASWMS+MiXkCIi\nFhiY1uvvHzFYVFS0W+w8jDE6c+ZMktgZkGl25UImZJruuvNrfvrv1++/7lzy7pzBMJozZ8/tf/3X\nwP+7Z8+eIrEzAQBInZ+fH2MzsPaBryeZgtra2iSxM0yETMJJMRcyCYNM4kPzAACAScO0FQDALIZp\nKwAAkAw0jymQ4hwnMgknxVzIJAwyiQ/NAwAAJg1rHgAAsxjWPAAAQDLQPKZAinOcyCScFHMhkzDI\nJD40DwAAmDSseQAAzGJY8wAAAMlA85gCKc5xIpNwUsyFTMIgk/jQPAAAYNKw5gEAMIuJtuZRVFS0\nZ9myZZdjYmKann/++XeHhob8fT6fwmQyVUdGRrampKRU9fb2Bo/fn+M4e1RUVEtVVVUKP97Q0BAf\nExPTxHGcPT8//wA/PjQ05J+VlXWM4zh7YmJi3dWrVx+d7icJAADT657No729XfPWW2/95OLFi483\nNTXF3L59e67FYtloNpt3m0ym6tbW1kij0VhjNpt3ExE1NzdHHzt2LKu5uTnaarWu3rp16yG+4+Xl\n5b1ZUlKSa7fbObvdzlmt1tVERCUlJbkhISFeu93O7dy58/WCgoLimX/a00OKc5zIJJwUcyGTMMgk\nvns2j8DAwBtyuXzk1q1b80ZHR2W3bt2at3jx4q6Kioq0nJycUiKinJyc0hMnTmQQ3bnGeHZ29lG5\nXD6i0WjaIyIi2mw2m8Htdi/q7+9/OCEh4QIR0ebNm3/LHzP+vjIzM9+rqakxzuxTBgCAqZLd60aF\nQuH76U9/+stHHnmkIyAgYCA1NfVDk8lU7fF4lEql0kNEpFQqPR6PR0lE1NXVtTgxMbGOP16tVjtd\nLpdKLpePqNVqJz+uUqlcLpdLRUTkcrlU4eHhnUREMplsNCgoqM/n8ykUCoXvbxNtIcauzKmp8TcG\nBAQMxMXFNSYlJdUS/bXrf9PbPLEe/9uwnZSUVCulPOO3eVLJI8VtKb5//JhU8kjp56m2tjbp8OHD\nW4iINBpNO82Ue13gvK2tTbt06dLmL7/8MmRkZESWkZHx+7Kysh8FBwf3jN9v/vz5PsYYbdu27VdH\njhz5IT+em5v7dnl5eWZ9fX18cnJyNT9+9uzZVWvXrv2AMUZ6vb7J5XIt5m/TarVtXq9XcbeLuBMx\nNmfO7tHCwsI9Yl9UHoVCob4NdefX/PTf7z2nrerr61c88cQTH4eEhHhlMtnoc8899/758+f/ISws\nrLu7uzuMiMjtdi8KDQ29RnTnjKKzszOcP97pdKrVarVTpVK5nE6neuI4f0xHR8cjRESjo6Oyvr6+\noLufdUjPxP/bkAJkEk6KuZBJGGQS3z2bR1RUVEtdXV3iwMBAAGPM7/Tp08nR0dHNzzzzzAelpaU5\nRESlpaU5GRkZJ4iI0tLSKiwWy8bh4eEHHA7HErvdziUkJFwICwvrDgwMvGGz2QyMMb+ysrJN6enp\nJ/lj+PsqLy9fZzQaa2b6SQMAwBTd79SkuLj41ejo6Mt6vb5p8+bNpcPDw3Kv16swGo2nOY5rNZlM\nVT09PcH8/vv27dur1WrbdDpdi9VqTeXH6+vr4/V6fZNWq23bvn37G/z44OCg//r1649HRETYDQZD\nncPh0HzdqRemrVAoFGpyRTM0bYU/EgQAmMXwxYgSJMU5TmQSToq5kEkYZBIfmgcAAEwapq0AAGYx\nTFsBAIBkoHlMgRTnOJFJOCnmQiZhkEl8aB4AADBpWPMAAJjFsOYBAACSgeYxBVKc40Qm4aSYC5mE\nQSbxoXkAAMCkYc0DAGAWw5oHAABIBprHFEhxjhOZhJNiLmQSBpnEh+YBAACThjUPAIBZDGseAAAg\nGfdtHn/5y190y5cvv8RXUFBQ3xtvvPGKz+dTmEym6sjIyNaUlJSq3t7eYP6YoqKiPRzH2aOiolqq\nqqpS+PGGhob4mJiYJo7j7Pn5+Qf48aGhIf+srKxjHMfZExMT665evfro9D/V6SfFOU5kEk6KuZBJ\nGGQS332bh06n+8ulS5eWX7p0aXlDQ0P8vHnzbj377LO/N5vNu00mU3Vra2uk0WisMZvNu4mImpub\no48dO5bV3NwcbbVaV2/duvUQf8qUl5f3ZklJSa7dbufsdjtntVpXExGVlJTkhoSEeO12O7dz587X\nCwoKimf2aQMAwJRM5pq1H374YcqTTz55jjFGOp2upbu7W8kYI7fbHabT6VoYY1RYWLjHbDYX8Mek\npqZaz58/n9jV1bUoKirqc3786NGjG1966aXf8PvU1dUZGGM0MjIiW7BgwfW7XYcX1zBHoVCoyRXN\n0DXMZZNpNBaLZWN2dvZRIiKPx6NUKpUeIiKlUunxeDxKIqKurq7FiYmJdfwxarXa6XK5VHK5fESt\nVjv5cZVK5XK5XCoiIpfLpQoPD+8kIpLJZKNBQUF9Pp9PoVAofP89wRZi7Mqcmhp/Y0BAwEBcXFxj\nUlJSLdFfTxmxjW1sY/vvebu2tjbp8OHDW4iINBpNO80UoV1maGjogQULFly/du3aQsYYBQcH94y/\nff78+T7GGG3btu1XR44c+SE/npub+3Z5eXlmfX19fHJycjU/fvbs2VVr1679gDFGer2+yeVyLeZv\n02q1bV6vVzGxe0rtzOPMmTNJYmdAptmVC5mQabqLZujMQ/CnrSorK9fEx8c3LFy48DrRnbON7u7u\nMCIit9u9KDQ09BrRnTOKzs7OcP44p9OpVqvVTpVK5XI6neqJ4/wxHR0djxARjY6Oyvr6+oL+9qwD\nAAAkQ2iXycrKshw+fDiH3961a9dr/NpGUVHR7oKCAjNjjC5fvhwdGxvbODQ09MAXX3yx5Lvf/e6V\nsbExP8YYJSQk2Orq6gxjY2N+a9asOVVZWbmaMUYHDx7c+vLLL7/J2J21kKysLMvduqfUzjxQKBRK\n6kUzdOYhaKevvvrqoZCQkC9v3LjxMD/m9XoVRqPxNMdxrSaTqaqnpyeYv23fvn17tVptm06na7Fa\nran8eH19fbxer2/SarVt27dvf4MfHxwc9F+/fv3xiIgIu8FgqHM4HJq7vQBoHigUCjW5ErV5SKGk\n2DykOMeJTN/uXMiETNNdM9U88BfmAAAwafhuKwCAWQzfbQUAAJKB5jEF/B/mSAkyCSfFXMgkDDKJ\nD80DAAAmDWseAACzGNY8AABAMtA8pkCKc5zIJJwUcyGTMMgkPjQPAACYNKx5AADMYljzAAAAyUDz\nmAIpznEik3BSzIVMwiCT+NA8AABg0rDmAQAwi2HNAwAAJAPNYwqkOMeJTMJJMRcyCYNM4hPUPHp7\ne4PXrVtXvnTp0s+jo6ObbTabwefzKUwmU3VkZGRrSkpKVW9vbzC/f1FR0R6O4+xRUVEtVVVVKfx4\nQ0NDfExMTBPHcfb8/PwD/PjQ0JB/VlbWMY7j7ImJiXVXr159dHqfJgAATCshV4zavHlzaUlJyYuM\nMRoZGZH19vYG7dq167Xi4uJXGWNkNpsLJl7DfHh4WO5wODRarbaNv4b5ypUrL9hstgTGGE28hnle\nXt4hxhhZLJYsXMMchUKhpqdIrMvQ9vb2Bi1ZsuSLieM6na6lu7tbyRgjt9sdptPpWhhjVFhYuMds\nNhfw+6WmplrPnz+f2NXVtSgqKupzfvzo0aMbX3rppd/w+9TV1RkYu9OcFixYcP1uLwCaBwqFQk2u\nZqp5yO53ZuJwOJYsXLjw+gsvvPDvf/7zn2Pj4+Mb9u/fv8Pj8SiVSqWHiEipVHo8Ho+SiKirq2tx\nYmJiHX+8Wq12ulwulVwuH1Gr1U5+XKVSuVwul4qIyOVyqcLDwzuJiGQy2WhQUFCfz+dTKBQK339P\ns4UYuzKnpsbfGBAQMBAXF9eYlJRUS/TX+cZvcruxsTFux44d+8V6/Ltt82NSyTM+i1Ty8Nt4/769\n79/+/ft3iP3vf+K2VH6eamtrkw4fPryFiEij0bTTTLlfd/nkk09WyGSykQsXLqxkjFF+fv7+n/3s\nZ/8SHBzcM36/+fPn+xhjtG3btl8dOXLkh/x4bm7u2+Xl5Zn19fXxycnJ1fz42bNnV61du/YDxhjp\n9foml8u1mL9Nq9W2eb1excTuKbUzDyle8B6Zvt25kAmZprtohs487rtgrlarnWq12rly5cpPiIjW\nrVtXfvHixcfDwsK6u7u7w4iI3G73otDQ0GtEd84oOjs7w/njnU6nWq1WO1UqlcvpdKonjvPHdHR0\nPEJENDo6Kuvr6wv627MO6eG7vpQgk3BSzIVMwiCT+O7bPMLCwrrDw8M7W1tbI4mITp8+nbxs2bLL\nzzzzzAelpaU5RESlpaU5GRkZJ4iI0tLSKiwWy8bh4eEHHA7HErvdziUkJFwICwvrDgwMvGGz2QyM\nMb+ysrJN6enpJ/lj+PsqLy9fZzQaa2buKQMAwJQJOT1pbGyMXbFixSePPfbYn5999tn3e3t7g7xe\nr8JoNJ7mOK7VZDJV9fT0BPP779u3b69Wq23T6XQtVqs1lR+vr6+P1+v1TVqttm379u1v8OODg4P+\n69evPx4REWE3GAx1DodDc7dTL0xbIdNsz4VMyDTdRWItmBMRxcbG/vmTTz5ZOXH89OnTyXfbf+/e\nvYV79+4tnDgeHx/f0NTUFDNx3N/ff+j48eMbhGQBAADx4butAABmMXy3FQAASAaaxxSM//y7VCCT\ncFLMhUzCIJP40DwAAGDSsOYBADCLYc0DAAAkA81jCqQ4x4lMwkkxFzIJg0ziQ/MAAIBJw5oHAMAs\nhjUPAACQDDSPKZDiHCcyCSfFXMgkDDKJD80DAAAmDWseAACzGNY8AABAMtA8pkCKc5zIJJwUcyGT\nMMgkPkHNQ6PRtD/22GOfLl++/FJCQsIFIiKfz6cwmUzVkZGRrSkpKVW9vb3B/P5FRUV7OI6zR0VF\ntVRVVaXw4w0NDfExMTFNHMfZ8/PzD/DjQ0ND/llZWcc4jrMnJibWXb169dHpfJIAADDNhFwxSqPR\nOLxer2L82K5du14rLi5+lTFGZrO5oKCgwMwYo8uXL0fHxsY2Dg8Pyx0Oh0ar1baNjY35McZo5cqV\nF2w2WwJjjNasWXOqsrJyNWOMDh48uDUvL+8QY4wsFktWVlaW5W5Xw5LalQRRKBRK6kUzdCVBwdNW\nbMKCS0VFRVpOTk4pEVFOTk7piRMnMoiITp48mZ6dnX1ULpePaDSa9oiIiDabzWZwu92L+vv7H+bP\nXDZv3vxb/pjx95WZmfleTU2NcToaIwAAzAxBzcPPz48lJyefXrFiRf1bb731EyIij8ejVCqVHiIi\npVLp8Xg8SiKirq6uxWq12skfq1arnS6XSzVxXKVSuVwul4qIyOVyqcLDwzuJiGQy2WhQUFCfz+dT\nTN/TnBlSnONEJuGkmAuZhEEm8Qm6hvmf/vSn/7Vo0SL39evXF5pMpuqoqKiW8bf7+fmxOx+lnWlb\niLErc2pq/I0BAQEDcXFxjUlJSbVEf33jvsntxsbGODEf/27bPKnkkfI23r9v73ZjY2OclPJI6eep\ntrY26fDhw1uI7qxX00yZ7DzXP/3TP/383/7t336q0+la3G53GGOMurq6Ful0uhbGGBUVFe0uKira\nze+fmppqraurM7jd7rCoqKjP+fF33303++WXX36T3+f8+fOJjDEaGRmRLViw4Prd5u2w5oFCoVCT\nKxJrzePWrVvz+vv7HyYiunnz5kNVVVUpMTExTWlpaRWlpaU5RESlpaU5GRkZJ4iI0tLSKiwWy8bh\n4eEHHA7HErvdziUkJFwICwvrDgwMvGGz2QyMMb+ysrJN6enpJ/lj+PsqLy9fZzQaa2amVQIAwLS4\nX3f54osvlsTGxjbGxsY2Llu27DP+//q9Xq/CaDSe5jiu1WQyVfX09ATzx+zbt2+vVqtt0+l0LVar\nNZUfr6+vj9fr9U1arbZt+/btb/Djg4OD/uvXrz8eERFhNxgMdQ6HQ3O37im1M48zZ84kiZ0BmWZX\nLmRCpukumqEzj/uueSxZssTBzy+Op1AofKdPn06+2zF79+4t3Lt3b+HE8fj4+IampqaYieP+/v5D\nx48f3yCk2QEAgPjw3VYAALMYvtsKAAAkA81jCiZ+vFIKkEk4KeZCJmGQSXxoHgAAMGlY8wAAmMWw\n5gEAAJKB5jEFUpzjRCbhpJgLmYRBJvGheQAAwKRhzQMAYBbDmgcAAEgGmscUSHGOE5mEk2IuZBIG\nmcSH5gEAAJOGNQ8AgFkMax4AACAZaB5TIMU5TmQSToq5kEkYZBIfmgcAAEyekCtGjY6Ozo2Li7u0\ndu3aDxi7cxXB5OTk6rtdRbCwsHBPRESEXafTtXz44Ycp/Dh/FcGIiAj7K6+8coAfHxwc9N+wYcMx\n/iqC7e3tj37d1bCkdiVBFAqFknqRWNcwJyI6cOBAfnR0dPOdRWsis9m822QyVbe2tkYajcYas9m8\nm4ioubk5+tixY1nNzc3RVqt19datWw+x/1qoycvLe7OkpCTXbrdzdruds1qtq4mISkpKckNCQrx2\nu53buXPn6wUFBcUz0SQBAGD63Ld5OJ1O9alTp57+8Y9//DbfCCoqKtJycnJKiYhycnJKT5w4kUFE\ndPLkyfTs7Oyjcrl8RKPRtEdERLTZbDaD2+1e1N/f/3BCQsIFIqLNmzf/lj9m/H1lZma+V1NTY5yp\nJzvdpDjHiUzCSTEXMgmDTOK7b/PYuXPn67/4xS92zZkzZ4wf83g8SqVS6SEiUiqVHo/HoyQi6urq\nWqxWq538fmq12ulyuVQTx1UqlcvlcqmIiFwulyo8PLyTiEgmk40GBQX1+Xw+xfQ9RQAAmG6ye934\nhz/8YW1oaOi15cuXX/q6rurn58f46ayZt4UYuzKnpsbfGBAQMBAXF9eYlJRUS/TXrv9Nb/PEevxv\nw3ZSUlKtlPKM3+ZJJY8Ut6X4/vFjUskjpZ+n2trapMOHD28hItJoNO00U+61ILJnz55CtVrdqdFo\nHGFhYe558+bd/NGPflSm0+la3G53GGOMurq6Ful0uhbGGBUVFe0uKirazR+fmppqraurM7jd7rCo\nqKjP+fF33303++WXX36T3+f8+fOJjDEaGRmRLViw4PrXLfpgwRyFQqEmVyTGgnlhYeHezs7OcIfD\nscRisWz8/ve//1FZWdmmtLS0itLS0hwiotLS0pyMjIwTRERpaWkVFotl4/Dw8AMOh2OJ3W7nEhIS\nLoSFhXUHBgbesNlsBsaYX1lZ2ab09PST/DH8fZWXl68zGo01M9Ypp9nE/9uQAmQSToq5kEkYZBLf\nPaetJuKnp3bv3m3esGHD8ZKSklyNRtN+/PjxDURE0dHRzRs2bDgeHR3dLJPJRg8dOrSVP+bQoUNb\nt2zZcnhgYCDg6aefPrV69WorEVFubm7Jpk2byjiOs4eEhHgtFsvG6X6SAAAwvfDdVgAAsxi+2woA\nACQDzWMKpDjHiUzCSTEXMgmDTOJD8wAAgEnDmgcAwCyGNQ8AAJAMNI8pkOIcJzIJJ8VcyCQMMokP\nzQMAACYNax4AALMY1jwAAEAy0DymQIpznMgknBRzIZMwyCQ+NA8AAJg0rHkAAMxiWPMAAADJQPOY\nAinOcSKTcFLMhUzCIJP40DwAAGDSsOYBADCLibLmMTg4+KDBYLDFxcU1RkdHN/O/sH0+n8JkMlVH\nRka2pqSkVPX29gbzxxQVFe3hOM4eFRXVUlVVlcKPNzQ0xMfExDRxHGfPz88/wI8PDQ35Z2VlHeM4\nzp6YmFh39erVR6f7SQIAwPS6Z/N48MEHB8+cOfNUY2Nj3KeffvrYmTNnnvrP//zPJ81m826TyVTd\n2toaaTQaa8xm824ioubm5uhjx45lNTc3R1ut1tVbt249xHe8vLy8N0tKSnLtdjtnt9s5q9W6moio\npKQkNyQkxGu327mdO3e+XlBQUDzzT3t6SHGOE5mEk2IuZBIGmcR33zWPefPm3SIiGh4efuD27dtz\n58+f31NRUZGWk5NTSkSUk5NTeuLEiQwiopMnT6ZnZ2cflcvlIxqNpj0iIqLNZrMZ3G73ov7+/ocT\nEhIuEBFt3rz5t/wx4+8rMzPzvZqaGuNMPVkAAJgesvvtMDY2Nufxxx+/eOXKFW1eXt6by5Ytu+zx\neJRKpdJDRKRUKj0ej0dJRNTV1bU4MTGxjj9WrVY7XS6XSi6Xj6jVaic/rlKpXC6XS0VE5HK5VOHh\n4Z1ERDKZbDQoKKjP5/MpFAqF72/TbCHGrsypqfE3BgQEDMTFxTUmJSXVEv2163/T2zyxHv/bsJ2U\nlFQrpTzjt3lSySPFbSm+f/yYVPJI6eeptrY26fDhw1uIiDQaTTvNFMaYoOrt7Q0yGAx1H3300VPB\nwcE942+bP3++jzFG27Zt+9WRI0d+yI/n5ua+XV5enllfXx+fnJxczY+fPXt21dq1az9gjJFer29y\nuVyL+du0Wm2b1+tVTHx8ImJEjM2Zs3u0sLBwj9DcKBQK9fdcd37NT//9Cv6oblBQUN8PfvCD/2ho\naIhXKpWe7u7uMCIit9u9KDQ09BrRnTOKzs7OcP4Yp9OpVqvVTpVK5XI6neqJ4/wxHR0djxARjY6O\nyvr6+oLuftYhPRP/b0MKkEk4KeZCJmGQSXz3bB5ffvnlAv6TVAMDAwHV1dWm5cuXX0pLS6soLS3N\nISIqLS3NycjIOEFElJaWVmGxWDYODw8/4HA4ltjtdi4hIeFCWFhYd2Bg4A2bzWZgjPmVlZVtSk9P\nP8kfw99XeXn5OqPRWDOzTxkAAKbsXqcln376aczy5csvxsbGNsbExHz62muv7WKMkdfrVRiNxtMc\nx7WaTKaqnp6eYP6Yffv27dVqtW06na7FarWm8uP19fXxer2+SavVtm3fvv0NfnxwcNB//fr1xyMi\nIuwGg6HO4XBovu7UC9NWKBQKNbmiGZq2wh8JAgDMYvhiRAmS4hwnMgknxVzIJAwyiQ/NAwAAJg3T\nVgAAsximrQAAQDLQPKZAinOcyCScFHMhkzDIJD40DwAAmDSseQAAzGJY8wAAAMlA85gCKc5xIpNw\nUsyFTMIgk/jQPAAAYNKw5gEAMIthzQMAACQDzWMKpDjHiUzCSTEXMgmDTOJD8wAAgEnDmgcAwCyG\nNQ8AAJCM+zaPzs7O8KeeeurMsmXLLuv1+s/eeOONV4iIfD6fwmQyVUdGRrampKRU8ZerJSIqKira\nw3GcPSoqqqWqqiqFH29oaIiPiYlp4jjOnp+ff4AfHxoa8s/KyjrGcZw9MTGx7urVq49O9xOdCVKc\n40Qm4aSYC5mEQSbx3bd5yOXykddff33n5cuXl9XV1SUePHjwHz///POlZrN5t8lkqm5tbY00Go01\nZrN5NxFRc3Nz9LFjx7Kam5ujrVbr6q1btx7iT5ny8vLeLCkpybXb7ZzdbuesVutqIqKSkpLckJAQ\nr91u53bu3Pl6QUFB8cw+bQAAmJLJXrc2PT39RHV1dbJOp2vp7u5WMsbI7XaH6XS6FsYYFRYW7jGb\nzQX8/qmpqdbz588ndnV1LYqKivqcHz969OjGl1566Tf8PnV1dQbGGI2MjMgWLFhw/W7X4cU1zFEo\nFGpyRTN1lE64AAAQ7UlEQVR0DXPZZBpNe3u75tKlS8sNBoPN4/EolUqlh4hIqVR6PB6Pkoioq6tr\ncWJiYh1/jFqtdrpcLpVcLh9Rq9VOflylUrlcLpeKiMjlcqnCw8M7iYhkMtloUFBQn8/nUygUCt9/\nT7CFGLsyp6bG3xgQEDAQFxfXmJSUVEv011NGbGMb29j+e96ura1NOnz48BYiIo1G004zRWiX6e/v\n/87jjz/e8Pvf/z6DMUbBwcE942+fP3++jzFG27Zt+9WRI0d+yI/n5ua+XV5enllfXx+fnJxczY+f\nPXt21dq1az9gjJFer29yuVyL+du0Wm2b1+tVTOyeUjvzOHPmTJLYGZBpduVCJmSa7qIZOvMQ9Gmr\nkZEReWZm5nubNm0qy8jIOEF052yju7s7jIjI7XYvCg0NvUZ054yis7MznD/W6XSq1Wq1U6VSuZxO\np3riOH9MR0fHI0REo6Ojsr6+vqC/PesAAADJuF93GRsb89u0adNvd+zY8fr48V27dr3Gr20UFRXt\nLigoMDPG6PLly9GxsbGNQ0NDD3zxxRdLvvvd714ZGxvzY4xRQkKCra6uzjA2Nua3Zs2aU5WVlasZ\nY3Tw4MGtL7/88puM3VkLycrKstyte0rtzAOFQqGkXjRDZx733eHcuXNP+vn5jcXGxjbGxcVdiouL\nu1RZWbna6/UqjEbjaY7jWk0mU1VPT08wf8y+ffv2arXaNp1O12K1WlP58fr6+ni9Xt+k1Wrbtm/f\n/gY/Pjg46L9+/frjERERdoPBUOdwODR3ewHQPFAoFGpyJVrzkEpJsXlIcY4Tmb7duZAJmaa7Zqp5\n4C/MAQBg0vDdVgAAsxi+2woAACQDzWMK+D/MkRJkEk6KuZBJGGQSH5oHAABMGtY8AABmMax5AACA\nZKB5TIEU5ziRSTgp5kImYZBJfGgeAAAwaVjzAACYxbDmAQAAkoHmMQVSnONEJuGkmAuZhEEm8aF5\nAADApGHNAwBgFsOaBwAASAaaxxRIcY4TmYSTYi5kEgaZxHff5vHiiy++o1QqPTExMU38mM/nU5hM\npurIyMjWlJSUqt7e3mD+tqKioj0cx9mjoqJaqqqqUvjxhoaG+JiYmCaO4+z5+fkH+PGhoSH/rKys\nYxzH2RMTE+uuXr366HQ+QQAAmAH3u1rU2bNnV128eHG5Xq9v4sd27dr1WnFx8auMMTKbzQUTr18+\nPDwsdzgcGq1W28Zfv3zlypUXbDZbAmOMJl6/PC8v7xBjjCwWS9bdrl/OXw1LalcSRKFQKKkXiXUl\nwVWrVp2bP39+z/ixioqKtJycnFIiopycnNITJ05kEBGdPHkyPTs7+6hcLh/RaDTtERERbTabzeB2\nuxf19/c/nJCQcIGIaPPmzb/ljxl/X5mZme/V1NQYp7M5AgDA9JP9Tw7yeDxKpVLpISJSKpUej8ej\nJCLq6upanJiYWMfvp1arnS6XSyWXy0fUarWTH1epVC6Xy6UiInK5XKrw8PBOIiKZTDYaFBTU5/P5\nFAqFwve3j7yFGLsyp6bG3xgQEDAQFxfXmJSUVEv01/nGb3K7sbExbseOHfvFevy7bfNjUskzPotU\n8vDbeP++ve/f/v37d4j973/itlR+nmpra5MOHz68hYhIo9G000wRcnricDg046etgoODe8bfPn/+\nfB9jjLZt2/arI0eO/JAfz83Nfbu8vDyzvr4+Pjk5uZofP3v27Kq1a9d+wBgjvV7f5HK5FvO3abXa\nNq/Xq7jbqZfUpq2keMF7ZPp250ImZJruIrGmre5GqVR6uru7w4iI3G73otDQ0GtEd84oOjs7w/n9\nnE6nWq1WO1UqlcvpdKonjvPHdHR0PEJENDo6Kuvr6wu6+1mH9PBdX0qQSTgp5kImYZBJfP+j5pGW\nllZRWlqaQ0RUWlqak5GRcYIft1gsG4eHhx9wOBxL7HY7l5CQcCEsLKw7MDDwhs1mMzDG/MrKyjal\np6efnHhf5eXl64xGY810PTkAAJgh9zs12bhx49FFixZ1yeXyYbVa3fnOO++84PV6FUaj8TTHca0m\nk6mqp6cnmN9/3759e7VabZtOp2uxWq2p/Hh9fX28Xq9v0mq1bdu3b3+DHx8cHPRfv3798YiICLvB\nYKhzOByarzv1wrQVMs32XMiETNNdNEPTVvddMD969Gj23cZPnz6dfLfxvXv3Fu7du7dw4nh8fHxD\nU1NTzMRxf3//oePHj2+4Xw4AAJAOfLcVAMAshu+2AgAAyUDzmILxn3+XCmQSToq5kEkYZBIfmgcA\nAEwa1jwAAGYxrHkAAIBkfCubxz//876f+/n5MT8/PxYYqLghVg4pznEik3BSzIVMwiCT+P5HX4wo\ntqGhm/5Ed6bb+vv9HhY5DgDA351v5ZrH2Jh5Lt88iPxoJubzAABmA6x5AACAZKB5TIEU5ziRSTgp\n5kImYZBJfGgeAAAwaVjzAACYxbDmAQAAkjELmoeMxPqbDynOcSKTcFLMhUzCIJP4JNM8rFbr6qio\nqBaO4+zFxcUFwo8cpTtTWIz6+3u+0b/5aGxsjPsmH08IZBJOirmQSRhkEp8kmsft27fnbtu27ddW\nq3V1c3Nz9NGjR7M///zzpWLnup/e3t5gsTNMhEzCSTEXMgmDTOKTRPO4cOFCQkRERJtGo2mXy+Uj\nGzdutJw8eTJ98vck3hQWAMDfE0k0D5fLpQoPD+/kt9VqtdPlcqkm7hcU9EyfXP67ka+/p/FTWP0P\n843Ez+8Bdrf/nmqDaW9v10zl+JmATMJJMRcyCYNMEiD2xdkZY1ReXp754x//+C1+u6ys7Efbtm37\n1cSLuKNQKBRq8jUTv7cl8cWIKpXK1dnZGc5vd3Z2hqvVauf4ffC3HAAA0iGJaasVK1bU2+12rr29\nXTM8PPzAsWPHstLS0irEzgUAAHcniTMPmUw2+utf/3pbamrqh7dv356bm5tbsnTp0s/FzgUAAF9D\n7PWO+1VlZeVqnU7XEhERYTebzQXTff8vvPDCO6GhoR69Xt/Ej3m9XkVycnI1x3GtJpOpqqenJ5i/\nrbCwcE9ERIRdp9O1fPjhhyn8eH19fbxer2+KiIiwv/LKKwf48cHBQf8NGzYci4iIsBsMhrr29vZH\n75epo6MjPCkp6Ux0dPTlZcuWfXbgwIFXxM41MDDwYEJCgi02NrZx6dKlzbt37y4SOxNfo6Ojc+Pi\n4i6tXbv2A6lkevTRR9tjYmI+jYuLu7Ry5coLUsjV09MTnJmZWR4VFfX50qVLm+vq6gxiZmppadHF\nxcVd4iswMLDvwIEDr4j9OhUWFu6Jjo6+rNfrm7Kzs98dHBz0FzvT/v378/V6fdOyZcs+279/f74U\nfp6m9RfxdNfo6OhcrVbb5nA4NMPDw/LY2NjG5ubmpdP5GGfPnl118eLF5eObx65du14rLi5+lTFG\nZrO5oKCgwMwYo8uXL0fHxsY2Dg8Pyx0Oh0ar1baNjY35McZo5cqVF2w2WwJjjNasWXOqsrJyNWOM\nDh48uDUvL+8QY4wsFktWVlaW5X6Z3G532KVLl+IYY9Tf3/+dyMjIvzQ3Ny8VO9fNmzfnMcZoZGRE\nZjAY6s6dO/ek2JkYY/TLX/7y/zz//PP/75lnnqmQwvvHGCONRuPwer2K8WNi59q8eXNpSUnJi/x7\n2NvbGyR2Jr5u3749JywszN3R0REuZiaHw6FZsmTJF4ODg/6MMdqwYcOxw4cP54iZqampSa/X65sG\nBgYeHB0dnZucnFzd1tamFfu9E71B3Ks+/vjjf0hNTbXy20VFRbuLiop2T/fjOBwOzfjmodPpWrq7\nu5WM3flFrtPpWhi7083Hn/2kpqZaz58/n9jV1bUoKirqc3786NGjG1966aXf8PvU1dUZ+H+wCxYs\nuD7ZfOnp6Seqq6uTpZLr5s2b81asWPHJZ599tkzsTJ2dnWqj0Xj6o48+eoo/8xA7E2N3mseXX34Z\nMn5MzFy9vb1BS5Ys+WLiuBReK8YYffjhhylPPvnkObEzeb1eRWRk5F98Pt/8kZER2dq1az+oqqoy\niZnpd7/73brc3Ny3+e1/+Zd/+VlxcfGrYr93klgw/zpC//5junk8HqVSqfQQESmVSo/H41ESEXV1\ndS0e/ykwPs/EcZVK5eJzjn8OMplsNCgoqM/n8ymEZmlvb9dcunRpucFgsImda2xsbE5cXFyjUqn0\nPPXUU2eWLVt2WexMO3fufP0Xv/jFrjlz5ozxY2JnIrrzTabJycmnV6xYUf/WW2/9ROxcDodjycKF\nC6+/8MIL//74449f/MlPfvLWzZs3H5LCa0VEZLFYNmZnZx8V+3VSKBS+n/70p7985JFHOhYvXtwV\nHBzcazKZqsXMpNfrPzt37twqn8+nuHXr1rxTp0497XQ61WK/d5JuHne+hl38DGLl+Oqrr76TmZn5\n3oEDB/IffvjhfrFzzZkzZ6yxsTHO6XSqz549+7/PnDnzlJiZ/vCHP6wNDQ29tnz58kvsaz7KLdb7\n96c//el/Xbp0aXllZeWagwcP/uO5c+dWiZlrdHRUdvHixce3bt166OLFi48/9NBDN81m824xM/GG\nh4cf+OCDD55Zv3797ybe9k1nunLlinb//v072tvbNV1dXYu/+uqr7xw5cuRHYmaKiopqKSgoKE5J\nSalas2ZNZVxcXOPcuXNvi5mJSOLNQ8jff8wEpVLp6e7uDiMicrvdi0JDQ6/dLY/T6VSr1WqnSqVy\nOZ1O9cRx/piOjo5HiO78A+7r6wtSKBS++2UYGRmRZ2Zmvrdp06ayjIyME1LJRUQUFBTU94Mf/OA/\nGhoa4sXM9PHHHz9RUVGRtmTJEkd2dvbRjz766PubNm0qk8LrtGjRIjcR0cKFC68/++yzv79w4UKC\nmLnUarVTrVY7V65c+QkR0bp168ovXrz4eFhYWLfYr1VlZeWa+Pj4hoULF14nEvfnvL6+fsUTTzzx\ncUhIiFcmk40+99xz758/f/4fxH6dXnzxxXfq6+tX/PGPf/ze/PnzeyIjI1vF/jmXdPMQ6+8/0tLS\nKkpLS3OIiEpLS3P4X95paWkVFotl4/Dw8AMOh2OJ3W7nEhISLoSFhXUHBgbesNlsBsaYX1lZ2ab0\n9PSTE++rvLx8ndForLnf4zPG/HJzc0uio6Obd+zYsV8Kub788ssF/Be/DQwMBFRXV5uWL19+ScxM\nhYWFezs7O8MdDscSi8Wy8fvf//5HZWVlm8R+/27dujWvv7//YSKimzdvPlRVVZUSExPTJGausLCw\n7vDw8M7W1tZIIqLTp08nL1u27PIzzzzzgZivFRHR0aNHs/kpq4n3801nioqKaqmrq0scGBgIYIz5\nnT59Ojk6OrpZ7Nfp2rVroUREHR0dj7z//vvPPf/88++K/XM+rQvPM1GnTp1aExkZ+RetVttWWFi4\nZ7rvf+PGjUcXLVrUJZfLh9Vqdec777zzgtfrVRiNxtN3+wjcvn379mq12jadTtditVpT+XH+I3Ba\nrbZt+/btb/Djg4OD/uvXrz/OfwTO4XBo7pfp3LlzT/r5+Y3FxsY28h9jrKysXC1mrk8//TRm+fLl\nF2NjYxtjYmI+fe2113ax/1pgFPO14qu2tvZ7/KetxM70xRdfLImNjW2MjY1tXLZs2Wf8z63YuRob\nG2NXrFjxyWOPPfbnZ5999v3e3t4gsTN99dVXD4WEhHx548aNh/kxsTMVFxe/yn9Ud/PmzaXDw8Ny\nsTOtWrXqbHR09OXY2NjGjz766CkpvE7fmsvQAgCAdEh62goAAKQJzQMAACYNzQMAACYNzQMAACYN\nzQMAACYNzQMAACbt/wPWUJ2k/VQgowAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x780c5f90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "songcount.hist(bins=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "songcount.sort(ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_song_info_from_sid(conn, sid):\n",
    "    cur = conn.cursor()\n",
    "    cur.execute(\"SELECT title, artist_name FROM songs WHERE song_id = '%s'\" % (sid))\n",
    "    title, artist = cur.fetchone()\n",
    "    return title, artist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sehr kosmisch BY Harmonia -- count: 80277\n",
      "Dog Days Are Over (Radio Edit) BY Florence + The Machine -- count: 71975\n",
      "Undo BY Björk -- count: 63486\n",
      "Secrets BY OneRepublic -- count: 60764\n",
      "You're The One BY Dwight Yoakam -- count: 60062\n",
      "Revelry BY Kings Of Leon -- count: 59131\n",
      "Fireflies BY Charttraxx Karaoke -- count: 50561\n",
      "Horn Concerto No. 4 in E flat K495: II. Romance (Andante cantabile) BY Barry Tuckwell/Academy of St Martin-in-the-Fields/Sir Neville Marriner -- count: 50029\n",
      "Hey_ Soul Sister BY Train -- count: 50013\n",
      "Tive Sim BY Cartola -- count: 44583\n",
      "OMG BY Usher featuring will.i.am -- count: 41360\n",
      "Drop The World BY Lil Wayne / Eminem -- count: 39214\n",
      "The Scientist BY Coldplay -- count: 38856\n",
      "Canada BY Five Iron Frenzy -- count: 37666\n",
      "Clocks BY Coldplay -- count: 36879\n",
      "Marry Me BY Train -- count: 36732\n",
      "Catch You Baby (Steve Pitron & Max Sanna Radio Edit) BY Lonnie Gordon -- count: 35097\n",
      "Pursuit Of Happiness (nightmare) BY Kid Cudi / MGMT / Ratatat -- count: 34032\n",
      "Lucky (Album Version) BY Jason Mraz & Colbie Caillat -- count: 33137\n",
      "Bulletproof BY La Roux -- count: 32884\n",
      "Alejandro BY Lady GaGa -- count: 32345\n",
      "Creep (Explicit) BY Radiohead -- count: 32231\n",
      "Just Dance BY Lady GaGa / Colby O'Donis -- count: 31958\n",
      "Billionaire [feat. Bruno Mars]  (Explicit Album Version) BY Travie McCoy -- count: 31932\n",
      "Sincerité Et Jalousie BY Alliance Ethnik -- count: 31547\n",
      "Représente BY Alliance Ethnik -- count: 30885\n",
      "The Only Exception (Album Version) BY Paramore -- count: 29614\n",
      "Invalid BY Tub Ring -- count: 28354\n",
      "Bleed It Out [Live At Milton Keynes] BY Linkin Park -- count: 28197\n",
      "I Gotta Feeling BY Black Eyed Peas -- count: 28048\n",
      "Ain't Misbehavin BY Sam Cooke -- count: 27195\n",
      "Heartbreak Warfare BY John Mayer -- count: 26389\n",
      "When You Were Young BY The Killers -- count: 26378\n",
      "Fix You BY Coldplay -- count: 26352\n",
      "Livin' On A Prayer BY Bon Jovi -- count: 26347\n",
      "The Gift BY Angels and Airwaves -- count: 25823\n",
      "Float On BY Modest Mouse -- count: 25196\n",
      "Cosmic Love BY Florence + The Machine -- count: 25167\n",
      "Halo BY Beyoncé -- count: 24744\n",
      "Kryptonite BY 3 Doors Down -- count: 24716\n",
      "Uprising BY Muse -- count: 24605\n",
      "Party In The U.S.A. BY Miley Cyrus -- count: 24448\n",
      "Sample Track 2 BY Simon Harris -- count: 24185\n",
      "I CAN'T GET STARTED BY Ron Carter -- count: 24104\n",
      "Bitter Sweet Symphony BY The Verve -- count: 23469\n",
      "You've Got The Love BY Florence + The Machine -- count: 22879\n",
      "Home BY Edward Sharpe & The Magnetic Zeros -- count: 22751\n",
      "Sexy Bitch BY DJ Dizzy -- count: 22623\n",
      "I Kissed A Girl BY Katy Perry -- count: 22605\n",
      "Electric Feel BY MGMT -- count: 22398\n"
     ]
    }
   ],
   "source": [
    "# take a look at the top 50 most listened songs\n",
    "with sqlite3.connect(os.path.join(MSD_ADD, md_dbfile)) as conn:\n",
    "    for i in xrange(50):\n",
    "        sid = songcount.index[i]\n",
    "        title, artist = get_song_info_from_sid(conn, sid)\n",
    "        print \"%s BY %s -- count: %d\" % (title, artist, songcount[i])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate in- and out-of-matrix split"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get all users & songs in filtered taste profile, shuffle them, and map to integer indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "playcount = tp[['sid', 'count']]   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "playcount_groupbysid = playcount.groupby('sid', as_index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                      sid   count\n",
      "6147   SOBONKR12A58A7A7E0  530291\n",
      "3174   SOAUWYT12A81C206F1  462084\n",
      "72395  SOSXLTC12AF72A7F54  381805\n",
      "22489  SOFRQTD12A81C233C0  318278\n",
      "3524   SOAXGDH12A8C13F8A1  293453\n",
      "16885  SOEGIYH12A6D4FC0E3  287323\n",
      "54300  SONYKOW12AB01849C9  232204\n",
      "61043  SOPUCYA12A8C13A694  231973\n",
      "77055  SOUFTBI12AB0183F65  203117\n",
      "55382  SOOFYTN12A6D4F9B35  186374\n",
      "30671  SOHTKMO12AB01843B0  183617\n",
      "80275  SOVDSJC12A58A7A271  177617\n",
      "6192   SOBOUPA12A6D4F81F1  175260\n",
      "13465  SODJWHY12A8C142CCE  166712\n",
      "44144  SOLFXKT12AB017E3E0  157726\n",
      "73078  SOTCMDJ12A6D4F8528  155002\n",
      "21564  SOFLJQZ12A6D4FADA6  146176\n",
      "75813  SOTWNDJ12A8C143984  136096\n",
      "78205  SOUNZHU12A8AE47481  123716\n",
      "78843  SOUSMXX12AB0185C24  121482\n",
      "79277  SOUVTSM12AC468F6A7  119187\n",
      "41186  SOKLRPJ12A8C13C3FE  110497\n",
      "66762  SORJICW12A8C13640D  106462\n",
      "6080   SOBOAFP12A8C131F36  106053\n",
      "60813  SOPSOHT12A67AE0235  105932\n",
      "83973  SOWEHOM12A6BD4E09E  103151\n",
      "76755  SOUDLVN12AAFF43658  103042\n",
      "60403  SOPPROJ12AB0184E18  101875\n",
      "11269  SOCVTLJ12A6310F0FD   99453\n",
      "60919  SOPTLQL12AB018D56F   95937\n",
      "...                   ...     ...\n",
      "32101  SOIDATE12A8C131200      53\n",
      "74223  SOTKYOX12A81C2061A      53\n",
      "40963  SOKKFHW12A67020AFB      53\n",
      "21522  SOFLDYI12A8C13C19F      53\n",
      "10035  SOCNQPD12AB0186432      53\n",
      "32170  SOIDNEP12A8C13C90B      53\n",
      "27029  SOGVISR12A8AE48718      53\n",
      "30890  SOHVCTM12A8C14128B      53\n",
      "24138  SOGCOVN12A58A79189      52\n",
      "19454  SOEXHUE12A6D4F9E3C      52\n",
      "84562  SOWINAL12A8C138F58      52\n",
      "50777  SOMZYTR12A8C13CC7D      52\n",
      "70327  SOSIWXC12A8C1372DF      52\n",
      "60903  SOPTGZW12A6D4F64A0      52\n",
      "89091  SOXPXBM12AB01820E7      51\n",
      "6681   SOBRRRB12A6D4F7328      51\n",
      "70609  SOSKRFC12A6701D619      51\n",
      "42073  SOKSBES12A58A7E021      51\n",
      "42806  SOKWZLI12A6310F139      51\n",
      "73908  SOTINBI12A58A80B04      51\n",
      "48795  SOMMEYQ12A8C131BC5      50\n",
      "1001   SOAGPYR12A6701DE67      50\n",
      "76083  SOTYOJT12AB018718B      50\n",
      "14085  SODOBPL12A8C142066      49\n",
      "58343  SOPATVC12A8C14385E      49\n",
      "48113  SOMHMKS12A81C22AC8      49\n",
      "70379  SOSJELD12AB018A529      49\n",
      "17334  SOEJGMO12A6D4F8333      49\n",
      "27596  SOGZAUQ12AAFF442CE      48\n",
      "31615  SOIAAWS12A58A7B3A0      47\n",
      "\n",
      "[97414 rows x 2 columns]\n"
     ]
    }
   ],
   "source": [
    "songcount = playcount_groupbysid.sum().sort('count', ascending=False)\n",
    "print songcount"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "unique_sid = pd.unique(tp['sid'])\n",
    "n_songs = len(unique_sid)\n",
    "# Shuffle songs\n",
    "np.random.seed(98765)\n",
    "idx = np.random.permutation(np.arange(n_songs))\n",
    "unique_sid = unique_sid[idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "97414\n"
     ]
    }
   ],
   "source": [
    "print n_songs\n",
    "unique_uid = pd.unique(tp['uid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Map song/user ID to indices\n",
    "song2id = dict((sid, i) for (i, sid) in enumerate(unique_sid))\n",
    "user2id = dict((uid, i) for (i, uid) in enumerate(unique_uid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('unique_uid.txt', 'w') as f:\n",
    "    for uid in unique_uid:\n",
    "        f.write('%s\\n' % uid)\n",
    "        \n",
    "with open('unique_sid.txt', 'w') as f:\n",
    "    for sid in unique_sid:\n",
    "        f.write('%s\\n' % sid)\n",
    "        \n",
    "with open('song2id.json', 'w') as f:\n",
    "    json.dump(song2id, f)\n",
    "\n",
    "with open('user2id.json', 'w') as f:\n",
    "    json.dump(user2id, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Select 5% songs for out-of-matrix prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "in_sid = unique_sid[:int(0.95 * n_songs)]\n",
    "out_sid = unique_sid[int(0.95 * n_songs):]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4871,)\n"
     ]
    }
   ],
   "source": [
    "print out_sid.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBFNSP12AF72A0E22</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SONRXOY12AB0181E84</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOXRXDG12A8C131DE5</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>110     </th>\n",
       "      <td> 85c1f87fea955d09b4bec2e36aee110927aedf9a</td>\n",
       "      <td> SOHANDU12A8C13C47F</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120     </th>\n",
       "      <td> 85c1f87fea955d09b4bec2e36aee110927aedf9a</td>\n",
       "      <td> SOTVFEF12AF729E6CE</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123     </th>\n",
       "      <td> 85c1f87fea955d09b4bec2e36aee110927aedf9a</td>\n",
       "      <td> SOUSQUG12A8C13616F</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158     </th>\n",
       "      <td> 969cc6fb74e076a68e36a04409cb9d3765757508</td>\n",
       "      <td> SOGFUFC12A8C13F1E5</td>\n",
       "      <td>  6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOCBRSN12AAF3B30A6</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>268     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOCSISN12AF72AB1DE</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOEWYLX12A6D4F8E5F</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>276     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOFNCRW12A6D4F727B</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>322     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOQXKUV12A6D4FB4C9</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>339     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOUZRCP12AB0182164</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>342     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOVMCAR12AF72A1268</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>348     </th>\n",
       "      <td> b64cdd1a0bd907e5e00b39e345194768e330d652</td>\n",
       "      <td> SOWSWTD12A67ADA3D7</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366     </th>\n",
       "      <td> 17aa9f6dbdf753831da8f38c71b66b64373de613</td>\n",
       "      <td> SOBDVAK12AC90759A2</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>388     </th>\n",
       "      <td> 17aa9f6dbdf753831da8f38c71b66b64373de613</td>\n",
       "      <td> SOEOJHS12AB017F3DC</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>393     </th>\n",
       "      <td> 17aa9f6dbdf753831da8f38c71b66b64373de613</td>\n",
       "      <td> SOFKYDZ12AB017F425</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>423     </th>\n",
       "      <td> 17aa9f6dbdf753831da8f38c71b66b64373de613</td>\n",
       "      <td> SOJITNW12A8C13D951</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>510     </th>\n",
       "      <td> d6589314c0a9bcbca4fee0c93b14bc402363afea</td>\n",
       "      <td> SODLSCE12A6D4FBCAC</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>559     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOAFOBL12AF72A25BA</td>\n",
       "      <td> 12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>569     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOAOFBI12A8C143E28</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>587     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOBOJJB12A58A7D1AD</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>595     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOCBNIS12AF72AB9D3</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOCEWVG12A8C13DCC2</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>669     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOGIEOU12A8C134815</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>677     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOGVWGI12A8C13B9D1</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>683     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOHBURV12A8C13B628</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>687     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOHIDCT12AB018C98E</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>691     </th>\n",
       "      <td> 5a905f000fc1ff3df7ca807d57edb608863db05d</td>\n",
       "      <td> SOHWBGO12A6D4FA87A</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372670</th>\n",
       "      <td> 3b91968ca65411893d356bb96e7cce1e3fe8f764</td>\n",
       "      <td> SOKPWKZ12AB0182223</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372676</th>\n",
       "      <td> 3b91968ca65411893d356bb96e7cce1e3fe8f764</td>\n",
       "      <td> SOLWHDY12A6310DFE5</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372697</th>\n",
       "      <td> 3b91968ca65411893d356bb96e7cce1e3fe8f764</td>\n",
       "      <td> SOPFJGB12A6702166F</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372731</th>\n",
       "      <td> 3b91968ca65411893d356bb96e7cce1e3fe8f764</td>\n",
       "      <td> SOVCNHZ12AB0180982</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372867</th>\n",
       "      <td> 67b00a32e0d314eb059016a24864d5e7ee8219b8</td>\n",
       "      <td> SOGOPZL12A8C13CC50</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372886</th>\n",
       "      <td> 67b00a32e0d314eb059016a24864d5e7ee8219b8</td>\n",
       "      <td> SOQARDA12B0B809080</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48372921</th>\n",
       "      <td> 69e03764ed3bb92a765bd73ef273fcc479f63754</td>\n",
       "      <td> SONGTTS12A6701E59A</td>\n",
       "      <td> 13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373009</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOARLEM12AF729FFB8</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373072</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOMCAFM12A58A7B024</td>\n",
       "      <td> 11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373077</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOMUENG12A8C1442F3</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373085</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOOAFDW12A8C13325B</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373091</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOOVJTE12A8C132892</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373096</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOOZFCC12A58A7D783</td>\n",
       "      <td> 14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373115</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOSKFED12A8C1343AB</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373123</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOTGZIH12A8C1428A5</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373124</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOTKNTF12A8C144A0D</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373139</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOVBRCP12A6701D7B5</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373144</th>\n",
       "      <td> 4d5b26d3f618ce63ba018fe34d57e71f1b8e2184</td>\n",
       "      <td> SOWIGII12A58A7A939</td>\n",
       "      <td> 16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373241</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SODLAPJ12A8C142002</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373254</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOEWYLX12A6D4F8E5F</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373324</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOLRGVL12A8C143BC3</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373329</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOMCAFM12A58A7B024</td>\n",
       "      <td>  6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373356</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOPREHY12AB01815F9</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373377</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOSCIZP12AB0181D2F</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373396</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOTKYBW12A8C13C3EA</td>\n",
       "      <td>  3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373428</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOVWADY12AB0189C63</td>\n",
       "      <td>  5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373442</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOWYYUQ12A6701D68D</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373451</th>\n",
       "      <td> 8305c896f42308824da7d4386f4b9ee584281412</td>\n",
       "      <td> SOXZSEH12AC468CABB</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373548</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SODJQXO12A6D4F697D</td>\n",
       "      <td>  2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373549</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOEISDE12A8AE4632E</td>\n",
       "      <td>  1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1922113 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               uid                 sid  count\n",
       "3         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBFNSP12AF72A0E22      1\n",
       "50        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SONRXOY12AB0181E84      1\n",
       "96        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOXRXDG12A8C131DE5      1\n",
       "110       85c1f87fea955d09b4bec2e36aee110927aedf9a  SOHANDU12A8C13C47F      1\n",
       "120       85c1f87fea955d09b4bec2e36aee110927aedf9a  SOTVFEF12AF729E6CE      3\n",
       "123       85c1f87fea955d09b4bec2e36aee110927aedf9a  SOUSQUG12A8C13616F      2\n",
       "158       969cc6fb74e076a68e36a04409cb9d3765757508  SOGFUFC12A8C13F1E5      6\n",
       "266       b64cdd1a0bd907e5e00b39e345194768e330d652  SOCBRSN12AAF3B30A6      2\n",
       "268       b64cdd1a0bd907e5e00b39e345194768e330d652  SOCSISN12AF72AB1DE      2\n",
       "275       b64cdd1a0bd907e5e00b39e345194768e330d652  SOEWYLX12A6D4F8E5F      3\n",
       "276       b64cdd1a0bd907e5e00b39e345194768e330d652  SOFNCRW12A6D4F727B      2\n",
       "322       b64cdd1a0bd907e5e00b39e345194768e330d652  SOQXKUV12A6D4FB4C9      1\n",
       "339       b64cdd1a0bd907e5e00b39e345194768e330d652  SOUZRCP12AB0182164      1\n",
       "342       b64cdd1a0bd907e5e00b39e345194768e330d652  SOVMCAR12AF72A1268      1\n",
       "348       b64cdd1a0bd907e5e00b39e345194768e330d652  SOWSWTD12A67ADA3D7      1\n",
       "366       17aa9f6dbdf753831da8f38c71b66b64373de613  SOBDVAK12AC90759A2      1\n",
       "388       17aa9f6dbdf753831da8f38c71b66b64373de613  SOEOJHS12AB017F3DC      2\n",
       "393       17aa9f6dbdf753831da8f38c71b66b64373de613  SOFKYDZ12AB017F425      1\n",
       "423       17aa9f6dbdf753831da8f38c71b66b64373de613  SOJITNW12A8C13D951      2\n",
       "510       d6589314c0a9bcbca4fee0c93b14bc402363afea  SODLSCE12A6D4FBCAC      1\n",
       "559       5a905f000fc1ff3df7ca807d57edb608863db05d  SOAFOBL12AF72A25BA     12\n",
       "569       5a905f000fc1ff3df7ca807d57edb608863db05d  SOAOFBI12A8C143E28      1\n",
       "587       5a905f000fc1ff3df7ca807d57edb608863db05d  SOBOJJB12A58A7D1AD      3\n",
       "595       5a905f000fc1ff3df7ca807d57edb608863db05d  SOCBNIS12AF72AB9D3      2\n",
       "596       5a905f000fc1ff3df7ca807d57edb608863db05d  SOCEWVG12A8C13DCC2      1\n",
       "669       5a905f000fc1ff3df7ca807d57edb608863db05d  SOGIEOU12A8C134815      1\n",
       "677       5a905f000fc1ff3df7ca807d57edb608863db05d  SOGVWGI12A8C13B9D1      1\n",
       "683       5a905f000fc1ff3df7ca807d57edb608863db05d  SOHBURV12A8C13B628      1\n",
       "687       5a905f000fc1ff3df7ca807d57edb608863db05d  SOHIDCT12AB018C98E      1\n",
       "691       5a905f000fc1ff3df7ca807d57edb608863db05d  SOHWBGO12A6D4FA87A      2\n",
       "...                                            ...                 ...    ...\n",
       "48372670  3b91968ca65411893d356bb96e7cce1e3fe8f764  SOKPWKZ12AB0182223      1\n",
       "48372676  3b91968ca65411893d356bb96e7cce1e3fe8f764  SOLWHDY12A6310DFE5      1\n",
       "48372697  3b91968ca65411893d356bb96e7cce1e3fe8f764  SOPFJGB12A6702166F      3\n",
       "48372731  3b91968ca65411893d356bb96e7cce1e3fe8f764  SOVCNHZ12AB0180982      1\n",
       "48372867  67b00a32e0d314eb059016a24864d5e7ee8219b8  SOGOPZL12A8C13CC50      1\n",
       "48372886  67b00a32e0d314eb059016a24864d5e7ee8219b8  SOQARDA12B0B809080      1\n",
       "48372921  69e03764ed3bb92a765bd73ef273fcc479f63754  SONGTTS12A6701E59A     13\n",
       "48373009  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOARLEM12AF729FFB8      1\n",
       "48373072  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOMCAFM12A58A7B024     11\n",
       "48373077  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOMUENG12A8C1442F3      2\n",
       "48373085  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOOAFDW12A8C13325B      2\n",
       "48373091  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOOVJTE12A8C132892      1\n",
       "48373096  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOOZFCC12A58A7D783     14\n",
       "48373115  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOSKFED12A8C1343AB      1\n",
       "48373123  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOTGZIH12A8C1428A5      2\n",
       "48373124  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOTKNTF12A8C144A0D      2\n",
       "48373139  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOVBRCP12A6701D7B5      2\n",
       "48373144  4d5b26d3f618ce63ba018fe34d57e71f1b8e2184  SOWIGII12A58A7A939     16\n",
       "48373241  8305c896f42308824da7d4386f4b9ee584281412  SODLAPJ12A8C142002      2\n",
       "48373254  8305c896f42308824da7d4386f4b9ee584281412  SOEWYLX12A6D4F8E5F      1\n",
       "48373324  8305c896f42308824da7d4386f4b9ee584281412  SOLRGVL12A8C143BC3      1\n",
       "48373329  8305c896f42308824da7d4386f4b9ee584281412  SOMCAFM12A58A7B024      6\n",
       "48373356  8305c896f42308824da7d4386f4b9ee584281412  SOPREHY12AB01815F9      3\n",
       "48373377  8305c896f42308824da7d4386f4b9ee584281412  SOSCIZP12AB0181D2F      1\n",
       "48373396  8305c896f42308824da7d4386f4b9ee584281412  SOTKYBW12A8C13C3EA      3\n",
       "48373428  8305c896f42308824da7d4386f4b9ee584281412  SOVWADY12AB0189C63      5\n",
       "48373442  8305c896f42308824da7d4386f4b9ee584281412  SOWYYUQ12A6701D68D      1\n",
       "48373451  8305c896f42308824da7d4386f4b9ee584281412  SOXZSEH12AC468CABB      1\n",
       "48373548  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SODJQXO12A6D4F697D      2\n",
       "48373549  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOEISDE12A8AE4632E      1\n",
       "\n",
       "[1922113 rows x 3 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_tp = tp[tp['sid'].isin(out_sid)]\n",
    "out_tp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>uid</th>\n",
       "      <th>sid</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOAKIMP12A8C130995</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOAPDEY12A81C210A9</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBBMDR12A8C13253B</td>\n",
       "      <td> 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBFOVM12A58A7D494</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBSUJE12A6D4F8CF5</td>\n",
       "      <td> 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBVFZR12A6D4F8AE3</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8       </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBXALG12A8C13C108</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOBYHAJ12A6701BF1D</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOCNMUH12A6D4F6E6D</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SODACBL12A8C13C273</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SODDNQT12A6D4F5F7E</td>\n",
       "      <td> 5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SODXRTY12AB0180F3B</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SODZWFT12A8C13C0E4</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOEGVZY12A58A7857E</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOEOBYG12A6D4F8AE2</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOEWFWM12A8C1308BA</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOFFJPX12A6D4F7456</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOFGUAY12AB017B0A8</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOFRQTD12A81C233C0</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOFZFQU12A8C13CAB8</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOGJAOS12A6D4F7459</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOHQIAG12A8C136F64</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOHQWYZ12A6D4FA701</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOHQZCA12A6D4FB317</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOIAOBY12A8C13BF75</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOIQOQT12A8C136F96</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOISWBZ12A8C13C0F7</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOIYTOA12A6D4F9A23</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOIZAZL12A6701C53B</td>\n",
       "      <td> 5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34      </th>\n",
       "      <td> b80344d063b5ccb3212f76538f3d9e43d87dca9e</td>\n",
       "      <td> SOJNNUA12A8AE48C7A</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373552</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOHHKGO12AC3DF57BF</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373553</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOHLLRP12A6701F2F4</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373554</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOHMACD12A6D4F9582</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373555</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOHYSXA12AB0186704</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373556</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOIBAQJ12AB0182643</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373557</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOJGZXL12A6D4F2980</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373558</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOJZZQW12A6702028B</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373560</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOMAKIT12A58A7E292</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373561</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SONWLIS12A8C140865</td>\n",
       "      <td> 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373562</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOOAAGD12AB017BCDA</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373563</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOOENDM12A6D2281CB</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373564</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOOFYTN12A6D4F9B35</td>\n",
       "      <td> 4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373566</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOPCTBB12AF72A1B64</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373567</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOPJLFV12A6701C797</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373568</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOPUELG12A6701D215</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373569</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOQBOWE12A8C13CC2E</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373570</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SORFHOZ12A6701E129</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373571</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SORPVUD12A67020454</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373572</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOSCPOI12A8C139F02</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373573</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOSKDTM12A6701C795</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373574</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOSQJWM12A6D4F79E0</td>\n",
       "      <td> 6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373576</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOTFOAE12A6D4F4511</td>\n",
       "      <td> 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373577</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOTIXTZ12AF72A39AC</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373579</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOTULEI12A58A7CB72</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373580</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOTYMDI12A6D4F867D</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373581</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOUHHHH12AF729E4AF</td>\n",
       "      <td> 2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373582</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOUJVIT12A8C1451C1</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373583</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOUSMXX12AB0185C24</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373584</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOWYSKH12AF72A303A</td>\n",
       "      <td> 3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48373585</th>\n",
       "      <td> b7815dbb206eb2831ce0fe040d0aa537e2e800f7</td>\n",
       "      <td> SOYYFLV12A58A7A88F</td>\n",
       "      <td> 1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>36304189 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               uid                 sid  count\n",
       "0         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAKIMP12A8C130995      1\n",
       "1         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAPDEY12A81C210A9      1\n",
       "2         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBBMDR12A8C13253B      2\n",
       "4         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBFOVM12A58A7D494      1\n",
       "6         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBSUJE12A6D4F8CF5      2\n",
       "7         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBVFZR12A6D4F8AE3      1\n",
       "8         b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBXALG12A8C13C108      1\n",
       "10        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBYHAJ12A6701BF1D      1\n",
       "11        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOCNMUH12A6D4F6E6D      1\n",
       "12        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODACBL12A8C13C273      1\n",
       "13        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODDNQT12A6D4F5F7E      5\n",
       "14        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODXRTY12AB0180F3B      1\n",
       "15        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODZWFT12A8C13C0E4      1\n",
       "16        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOEGVZY12A58A7857E      1\n",
       "18        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOEOBYG12A6D4F8AE2      1\n",
       "19        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOEWFWM12A8C1308BA      1\n",
       "20        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFFJPX12A6D4F7456      1\n",
       "21        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFGUAY12AB017B0A8      1\n",
       "22        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFRQTD12A81C233C0      1\n",
       "24        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOFZFQU12A8C13CAB8      1\n",
       "25        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOGJAOS12A6D4F7459      1\n",
       "26        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOHQIAG12A8C136F64      1\n",
       "27        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOHQWYZ12A6D4FA701      1\n",
       "28        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOHQZCA12A6D4FB317      1\n",
       "29        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOIAOBY12A8C13BF75      1\n",
       "30        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOIQOQT12A8C136F96      1\n",
       "31        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOISWBZ12A8C13C0F7      1\n",
       "32        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOIYTOA12A6D4F9A23      1\n",
       "33        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOIZAZL12A6701C53B      5\n",
       "34        b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOJNNUA12A8AE48C7A      1\n",
       "...                                            ...                 ...    ...\n",
       "48373552  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOHHKGO12AC3DF57BF      1\n",
       "48373553  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOHLLRP12A6701F2F4      1\n",
       "48373554  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOHMACD12A6D4F9582      1\n",
       "48373555  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOHYSXA12AB0186704      1\n",
       "48373556  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOIBAQJ12AB0182643      1\n",
       "48373557  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOJGZXL12A6D4F2980      1\n",
       "48373558  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOJZZQW12A6702028B      1\n",
       "48373560  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOMAKIT12A58A7E292      1\n",
       "48373561  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SONWLIS12A8C140865      2\n",
       "48373562  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOOAAGD12AB017BCDA      1\n",
       "48373563  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOOENDM12A6D2281CB      1\n",
       "48373564  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOOFYTN12A6D4F9B35      4\n",
       "48373566  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOPCTBB12AF72A1B64      1\n",
       "48373567  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOPJLFV12A6701C797      1\n",
       "48373568  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOPUELG12A6701D215      1\n",
       "48373569  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOQBOWE12A8C13CC2E      1\n",
       "48373570  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SORFHOZ12A6701E129      1\n",
       "48373571  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SORPVUD12A67020454      1\n",
       "48373572  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOSCPOI12A8C139F02      1\n",
       "48373573  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOSKDTM12A6701C795      1\n",
       "48373574  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOSQJWM12A6D4F79E0      6\n",
       "48373576  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOTFOAE12A6D4F4511      2\n",
       "48373577  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOTIXTZ12AF72A39AC      1\n",
       "48373579  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOTULEI12A58A7CB72      1\n",
       "48373580  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOTYMDI12A6D4F867D      1\n",
       "48373581  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOUHHHH12AF729E4AF      2\n",
       "48373582  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOUJVIT12A8C1451C1      1\n",
       "48373583  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOUSMXX12AB0185C24      1\n",
       "48373584  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOWYSKH12AF72A303A      3\n",
       "48373585  b7815dbb206eb2831ce0fe040d0aa537e2e800f7  SOYYFLV12A58A7A88F      1\n",
       "\n",
       "[36304189 rows x 3 columns]"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "in_tp = tp[~tp['sid'].isin(out_sid)]\n",
    "in_tp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Generate train/test/vad sets"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pick out 20% of the rating for in-matrix prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(12345)\n",
    "n_ratings = in_tp.shape[0]\n",
    "test = np.random.choice(n_ratings, size=int(0.20 * n_ratings), replace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test_idx = np.zeros(n_ratings, dtype=bool)\n",
    "test_idx[test] = True\n",
    "\n",
    "test_tp = in_tp[test_idx]\n",
    "train_tp = in_tp[~test_idx]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Make sure there is no empty row or column in the training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "613682\n",
      "613682\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(train_tp['uid']))\n",
    "print len(pd.unique(in_tp['uid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "92543\n",
      "92543\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(train_tp['sid']))\n",
    "print len(pd.unique(in_tp['sid']))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pick out 10% of the training rating as validation set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(13579)\n",
    "n_ratings = train_tp.shape[0]\n",
    "vad = np.random.choice(n_ratings, size=int(0.10 * n_ratings), replace=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vad_idx = np.zeros(n_ratings, dtype=bool)\n",
    "vad_idx[vad] = True\n",
    "\n",
    "vad_tp = train_tp[vad_idx]\n",
    "train_tp = train_tp[~vad_idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "613682\n",
      "613682\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(train_tp['uid']))\n",
    "print len(pd.unique(in_tp['uid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "92543\n",
      "92543\n"
     ]
    }
   ],
   "source": [
    "print len(pd.unique(train_tp['sid']))\n",
    "print len(pd.unique(in_tp['sid']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test_tp.to_csv('in.test.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "train_tp.to_csv('in.train.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vad_tp.to_csv('in.vad.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "out_tp.to_csv('out.test.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
